In [1]:
import geopandas as gpd
import pandas as pd
from prettytable import PrettyTable
import os
import numpy as np
import matplotlib.pyplot as plt
from shapely.geometry import Point, LineString, MultiLineString
import math
from tabulate import tabulate
import networkx as nx


In [None]:
class Shapefile:
    def __init__(self, datasetNumber, inputFolder, outputFolder):
        # Initialize the Shapefile class with the given dataset name
        
        # Call the constructor of the base object class
        object.__init__(self)
        
        self.dataset_number = datasetNumber
        self.input_folder = inputFolder
        self.output_folder = outputFolder

        self.initVariables()


        self.min_diameter_m = 0  # Minimum diameter (initialized to 0)


    def initVariables(self):
        dataset_folders = {
            1: "1402", 2: "200213_3M_data", 3: "1704/M3M", 4: "200211_Veolia_M3M", 5: "3M", 6: "3MNEW"
        }
        self.dataset = dataset_folders.get(self.dataset_number)

        self.nodes_path = f"Data SIG/{self.dataset}/{self.input_folder}/Nodes.shp"
        self.pipes_path = f"Data SIG/{self.dataset}/{self.input_folder}/Pipes.shp"

        self.points_components = {
            "Manholes", "Pumps", "Fittings", "Structures", "TreatmentPlant", 
            "Accessories", "Appariel", "Deversoir", "PosteRefoulement"
        }
        self.components_points_gdf = {}
        self.attributes_components = {}

        self.previousIdsNodes = {1:"NUMERO", 2:"objectid", 3:"MSLINK", 4:"ID_NODE", 5:"idouvrage" , 6:"idouvrage"}
        self.previousIdsPipes = {1:"NUMERO", 2:"numero", 3:"MSLINK", 4:"ID_ARC", 5:"idcana", 6:"idcana"}
        self.idColumunNodes = self.previousIdsNodes[self.dataset_number]
        self.idColumunPipes = self.previousIdsPipes[self.dataset_number]

        self.bufferPath = "Data SIG/Buffer.shp"

        self.pipesPathUpdated = "Data SIG/" + self.dataset + "/PipesWithIds.shp"  # Path to the pipes shapefile
        self.dummyPath = "Data SIG/" + self.dataset + "/Dummy.shp"  # Path to the dummy shapefile



    def find_all_components_points_path(self, comments = False):
        self.components_points_paths = []
        '''
        This function searches for all the shapefiles in the dataset folder that belong to points components.
        '''
        folder_path = "Data SIG/"+self.dataset+"/"+self.input_folder
        for filename in os.listdir(folder_path):
            if filename.endswith(".shp") and os.path.splitext(filename)[0] in self.points_components:
                self.components_points_paths.append(os.path.join(folder_path, filename))

        if comments:
            print("Shapefiles paths representing nodes components".center(100,"*"))
            for component in self.components_points_paths:
                print(f'Path : {component}')
        return True
    

    def read_all_components_points_paths(self, comments = False):
        if comments:
            print("Opening databases".center(100,"*"))
        for path in self.components_points_paths:
            gdf = gpd.read_file(path)
            # Extract the file name without the extension and create a new 'source_1' attribute
            filename = path.split("/")[-1].split(".")[0]
            self.components_points_gdf[filename] = gdf
            if comments:
                print(f'The file {path} is correctly opned')

    def get_attributes_components_points(self, comments = False):
        '''
        This function retrieves attributes for each component and stores them in the attributes_components dictionary.
        '''

        for component, gdf in self.components_points_gdf.items():
            attributes = list(gdf.columns)
            '''attributes.append("source_1")
            attributes.append("id")'''
            self.attributes_components[component] = attributes
        if comments:
            table = PrettyTable()
            table.field_names = ["Component", "Attributes"]
            for component, attributes in self.attributes_components.items():
                table.add_row([component, ", ".join(attributes)])
            print(table)

        return True 
    
    

    '''def remove_id_attribute_from_components_points(self, comments = False):
        if comments:
            print(f'Romoving id attribute from components points'.center(100,"*"))
        for component, gdf in self.components_points_gdf.items():
            if self.idColumunNodes in gdf.columns:
                gdf.drop(columns=self.idColumunNodes, inplace=True)
                if comments:
                    print(f"The id column '{self.idColumunNodes}' is removed from ", component)
            else:
                if comments:
                    print(f"The id column '{self.idColumunNodes}' is not found in : ", component)'''
        
    
    def read_shapefile(self, file):
        if file == "Nodes":
            self.gdfNodes = gpd.read_file(self.nodes_path)
            print(f'The shapefile {self.nodes_path} is opned')
        elif file == "Pipes":
            self.gdfPipes = gpd.read_file(self.pipes_path)
            print(f'The shapefile {self.pipes_path} is opned')
        elif file == "Buffers":
            self.gdfBuffers = gpd.read_file(self.bufferPath)
            print(f'The shapefile {self.bufferPath} is opned')
        else:
            print('The file name must be Nodes, Pipes or buffers')

    def print_shapefile_info(self, file):
        if file == "Nodes":
            gdf = self.gdfNodes
        elif file == "Pipes":
            gdf = self.gdfPipes
        elif file == "Buffers":
            gdf = self.gdfBuffers

        print("Shapefile Information:")
        print(f"Number of rows: {len(gdf)}")
        print(f"CRS (Coordinate Reference System): {gdf.crs}")
        
        # Print column names and types
        print("\nColumns:")
        for column in gdf.columns:
            print(f"{column}: {gdf[column].dtype}")

        # Print the first few rows of the GeoDataFrame
        print("\nFirst 5 rows:")
        print(gdf.head())
        
    def add_unique_id_attribute(self,file, minValue=1 ):
        '''
        Adds unique IDs to shapefile layers.
        '''
        if file == "Nodes":
            if 'id' in self.gdfNodes.columns:
                self.gdfNodes.drop(columns=['id'], inplace=True)
            self.gdfNodes['id'] = range(minValue, minValue + len(self.gdfNodes))
        elif file == "Pipes":
            if 'id' in self.gdfPipes.columns:
                self.gdfPipes.drop(columns=['id'], inplace=True)
            self.gdfPipes['id'] = range(minValue, minValue + len(self.gdfPipes))
        elif file == "Buffers":
            if 'id' in self.gdfBuffers.columns:
                self.gdfBuffers.drop(columns=['id'], inplace=True)
            self.gdfBuffers['id'] = range(minValue, minValue + len(self.gdfBuffers))
        else:
            print('The file name must be Nodes, Pipes or buffers')




        

    def get_node_attributes(self, typeNode, idObject):
        attribute_list = self.attributes_components[typeNode]
        row_with_desired_id = self.gdfNodes.loc[self.gdfNodes['id'] == idObject]

        attributes = {}

        if not row_with_desired_id.empty:
            specific_row = row_with_desired_id.iloc[0]
            for attribute in attribute_list:
                if attribute in specific_row:
                    attributes[attribute] = specific_row[attribute]

        geometry = self.parse_geometry(attributes['geometry'])

        properties = {
            key: int(value) if isinstance(value, np.int64) else value
            for key, value in attributes.items() if key != 'geometry'
        }
        feature = {
            "type": "Feature",
            "geometry": geometry,
            "properties": properties
        }
        return feature
    
    def parse_geometry(self, geometry_point):
        if geometry_point.geom_type == 'Point':
            longitude, latitude = geometry_point.coords[0]
            return {
                "type": "Point",
                "coordinates": [longitude, latitude]
            }
        return None

    def get_pipe_attributes(self, idObject):
        row_with_desired_id = self.gdfPipes.loc[self.gdfPipes['id'] == idObject]
        attributes = {}
        
        for attr, value in row_with_desired_id.items():
            # Convert np.int64 to int
            if isinstance(value.values[0], np.int64):
                value = int(value.values[0])
            else:
                value = value.values[0]
            
            # Only include attribute if it's not NaN
            if not pd.isna(value) and attr != 'geometry':
                attributes[attr] = value
        
        # Extract the first geometry (index 0)
        geometry = row_with_desired_id['geometry'].values[0]
        
        # Convert the geometry to a GeoJSON-like dictionary if needed
        coords1 = []
        if geometry.geom_type == "LineString":
            coords1 = list(geometry.coords)
        elif geometry.geom_type == "MultiLineString":
            for line_string in geometry.geoms:
                coords1.append(list(line_string.coords))
        else:
            coords1 = []
        geometry_dict = {
            "type": geometry.geom_type,
            "coordinates": coords1
        }
        
        return {
            "type": "Feature",
            "geometry": geometry_dict,
            "properties": attributes
        }

    
    def describe_shapefiles(self):
        print("The total number of edges is :", self.gdfPipes.shape[0])
        print("The total number of nodes is :", self.gdfNodes.shape[0])

        # Group by the source column and count the elements in each group
        counts = self.gdfNodes.groupby("source_1").size()

        # Convert the result to a dictionary
        self.counts_dict = counts.to_dict()
        
        table = PrettyTable()
        table.field_names = ["Node Type", "Count"]

        for node_type, count in self.counts_dict.items():
            table.add_row([node_type, count])
        print(table)  

    def get_diameter_pipe(self, pipe_id):
        pipe_row = self.gdfPipes[self.gdfPipes['id'] == pipe_id]
        
        if not pipe_row.empty:
            diameter_cm = pipe_row['diametre'].iloc[0]
            
            # Convert to meters if diameter is not null or 0
            if diameter_cm is not None and diameter_cm != 0:
                return diameter_cm
        return None
    
    def get_min_diameter_pipes(self):
        filtered_diameters_cm = self.gdfPipes['diametre'][
            (self.gdfPipes['diametre'].notnull()) & (self.gdfPipes['diametre'] != 0)
        ]
        
        if not filtered_diameters_cm.empty:
            min_diameter_cm = filtered_diameters_cm.min()
            self.min_diameter_m = min_diameter_cm / 1000.0
            return self.min_diameter_m
        
        return None
    
    

    def insert_buffer(self, new_geometry):
        '''# Clear existing features from the GeoDataFrame
        self.gdfBuffers = gpd.GeoDataFrame(columns=self.gdfBuffers.columns)'''

        # Add the new geometry as a feature using pandas.concat
        new_row = gpd.GeoDataFrame({'geometry': [new_geometry]})
        self.gdfBuffers = pd.concat([self.gdfBuffers, new_row], ignore_index=True)

        # Save the GeoDataFrame to a shapefile
        self.gdfBuffers.to_file(self.bufferPath, driver='ESRI Shapefile')

    '''def convert_dimensions_to_double_and_rename(self):
        # Extract numeric values from 'dimensions' column and convert to float
        self.gdfPipes['diametre'] = self.gdfPipes['DIMENSIONS'].str.extract('(\d+)').astype(float)
        
        # Drop the original 'dimensions' column
        self.gdfPipes.drop(columns=['DIMENSIONS'], inplace=True)

    def convert_diametre_to_double(self):
        self.gdfPipes['diametre'] = self.gdfPipes['diametre'].str.extract('(\d+)').astype(float)'''

    def save_shapefile(self, file):
        if file == "Nodes":
            if os.path.exists(self.nodes_path):
                os.remove(self.nodes_path)
            self.gdfNodes.to_file(self.nodes_path, driver='ESRI Shapefile')
        elif file == "Pipes":
            if os.path.exists(self.pipesPathUpdated):
                os.remove(self.pipesPathUpdated)
            self.gdfPipes.to_file(self.pipesPathUpdated, driver='ESRI Shapefile')
        elif file == "Buffers":
            if os.path.exists(self.bufferPath):
                os.remove(self.bufferPath)
            self.gdfBuffers.to_file(self.bufferPath, driver='ESRI Shapefile')
        else:
            print('The file name must be Nodes, Pipes or buffers')

    def get_connected_components_pipes(self):

        # Create an empty graph
        self.graphShapeFile = nx.Graph()

        # Iterate through each row in the GeoDataFrame
        for idx, row in self.gdfPipes.iterrows():
            geometry = row.geometry
            if isinstance(geometry, LineString):
                # Get the start and end points of the pipe as nodes
                start_point = geometry.coords[0]
                end_point = geometry.coords[-1]

                # Add the nodes to the graph
                self.graphShapeFile.add_node(start_point)
                self.graphShapeFile.add_node(end_point)

                # Add an edge between the start and end points to represent the pipe
                self.graphShapeFile.add_edge(start_point, end_point)

        # Count the number of connected components in the graph
        num_connected_components = nx.number_connected_components(self.graphShapeFile)

        return num_connected_components