csv to geojonson

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import ast  # To safely evaluate the string representation of lists

# File paths
csv_path = r"C:/Users/hie/pe_tan/pypsa-earth/data/custom_lines.csv"  # Input CSV file
geojson_path = r"C:/Users/hie/pe_tan/pypsa-earth/data/custom_lines.geojson"  # Output GeoJSON file

# Load the CSV file
df = pd.read_csv(csv_path, delimiter=";", encoding="utf-8",dtype={"line_id": str})

# Ensure the CSV contains the 'coordinates' column
if "coordinates" not in df.columns:
    raise ValueError("The CSV file must contain a 'coordinates' column.")

# Parse the 'coordinates' column and create LineString geometries
def parse_coordinates(coordinates_str):
    try:
        # Safely evaluate the string representation of the list
        coordinates = ast.literal_eval(coordinates_str)
        return LineString(coordinates)
    except Exception as e:
        print(f"Error parsing 'coordinates': {coordinates_str} - {e}")
        return None

# Apply the parsing function to create the geometry column
df["geometry"] = df["coordinates"].apply(parse_coordinates)

# Drop the 'coordinates' column to match the structure of `all_clean_lines.geojson`
df = df.drop(columns=["coordinates"])

# Convert to a GeoDataFrame
gdf = gpd.GeoDataFrame(df, geometry="geometry", crs="EPSG:4326")  # Assuming WGS84 (EPSG:4326)

# Save to GeoJSON
gdf.to_file(geojson_path, driver="GeoJSON")

print(f"GeoJSON file has been saved to: {geojson_path}")

GeoJSON file has been saved to: C:\Users\hie\pe_tan\pypsa-earth\data\custom_lines.geojson


geojonson to csv

In [None]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import LineString
import ast  # To safely evaluate the string representation of lists



geojson_path=r"C:/Users/hie/pe_tan/pypsa-earth/resources/veroni/osm/clean/all_clean_lines.geojson"




# File paths
csv_path = "C:/Users/hie/pe_tan/pypsa-earth/data/custom_clean_lines.csv"  # outputCSV file


# Load the GeoJSON file
gdf = gpd.read_file(geojson_path)

# Extract coordinates from the geometry column and convert them to a string representation
gdf["coordinates"] = gdf["geometry"].apply(lambda geom: list(geom.coords) if geom else None)

# Drop the geometry column if not needed in the CSV
df = gdf.drop(columns=["geometry"])

# Save the DataFrame to a CSV file
df.to_csv(csv_path, index=False)

print(f"CSV file has been saved to: {csv_path}")

CSV file has been saved to: C:/Users/hie/pe_tan/pypsa-earth/data/custom_clean_lines.csv


CSV to NC

In [None]:
import pandas as pd

# File paths
input_csv = r'C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2040_base/era5_2013/Africa.csv'
output_csv = r'C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2040_base/era5_2013/Africa_converted.csv'

# Load the CSV file
df = pd.read_csv(input_csv, sep=';', parse_dates=['time'], dayfirst=True)

# Convert the 'time' column to the desired format (YYYY-MM-DD)
df['time'] = df['time'].dt.strftime('%Y-%m-%d %H:%M:%S')

# Save the updated DataFrame to a new CSV file
df.to_csv(output_csv, sep=';', index=False)

print(f"Time column has been converted and saved to: {output_csv}")

Time column has been converted and saved to: C:\Users\hie\pe_tan\pypsa-earth\data\ssp2-2.6\2040_base\era5_2013\Africa_converted.csv


In [None]:
import xarray as xr
import pandas as pd

# File paths
demand_profile_csv = r'C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2030_base/era5_2013/Africa_converted.csv'
nc_file_path = r'C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2030_base/era5_2013/Africa.nc'


# Convert the DataFrame to an xarray Dataset
dataset = xr.Dataset.from_dataframe(df)

# Save the Dataset to a NetCDF file
dataset.to_netcdf(nc_file_path)

print(f"NetCDF file has been saved to: {nc_file_path}")

NetCDF file has been saved to: C:\Users\hie\pe_tan\pypsa-earth\data\ssp2-2.6\2030_base\era5_2013\Africa.nc


In [12]:
import pandas as pd

# Load the datasets
dataset1 = pd.read_csv("C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2030_base/era5_2013/Africa.csv", sep=";")
dataset2 = pd.read_csv("C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2030/era5_2018/Africa.csv", sep=";")

# Get unique region codes
region_codes1 = set(dataset1["region_code"].unique())
region_codes2 = set(dataset2["region_code"].unique())

# Find mismatched region codes
missing_in_dataset1 = region_codes2 - region_codes1
missing_in_dataset2 = region_codes1 - region_codes2

print("Region codes missing in dataset1:", missing_in_dataset1)
print("Region codes missing in dataset2:", missing_in_dataset2)

Region codes missing in dataset1: set()
Region codes missing in dataset2: set()


In [6]:
import pandas as pd

# Load the dataset (replace 'your_file.csv' with your actual file path)
# Assuming the dataset has columns: 'region_code', 'time_series', 'electricity_demand'
file_path = 'C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2040_base/era5_2013/Africa.csv'
data = pd.read_csv(file_path, sep=';')
print(data.columns)
# Filter the dataset to keep only rows where region_code is 'TZ'
filtered_data = data[data['region_code'] == 'TZ']

# Save the filtered dataset to a new CSV file (optional)
output_file_path = 'C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2040_base/era5_2013/filtered_data.csv'
filtered_data.to_csv(output_file_path, sep=';',index=False)

print("Filtered data saved to:", output_file_path)

Index(['region_code', 'time', 'region_name', 'Electricity demand'], dtype='object')
Filtered data saved to: C:/Users/hie/pe_tan/pypsa-earth/data/ssp2-2.6/2040_base/era5_2013/filtered_data.csv
