# Create data files 
Takes the data from the `station_means.txt` and combines it with the `"Stations info".xlsx`to create to set of files. 
### A. Mapping
- MAPPING : File | Area | BFI | Mean Elevation | Datum[East, North] 

### B. Soil information
- SOIL IN : File | Name | BFI  | Landuse[Arti, agri, forest, wetl, water]\(%\) | Soil[1, 2, 3, 4, 5]\(%\)

In [1]:
import pandas as pd 
import os 
import sys 
import numpy as np 
from typing import List, Dict

In [2]:
columns_mapping = [
    "File",
    "Area",
    "BFI",
    "Elevation mean",
    "Datum",
    "East",
    "North"
]


# S : Soil infiltration capacity
columns_soilin = [
    "File",
    "Name",
    "BFI",
    "Artificial",
    "Agriculture",
    "Wetland",
    "Waterbody",
    "S1",          # SL_IFT1: Well suited (%)
    "S2",          # SL_IFT2: Medium suited (%)
    "S3",          # SL_IFT3: Little suited (%)
    "S4",          # SL_IFT4: Unsuitable (%)
    "S5"           # SL_IFT5 (Not classified%)
]

In [3]:
def mapping_to_file(finalpath: str, df: pd.DataFrame) -> None:
    filetype = ".txt"
    datums = ["UTM32", "UTM33", "UTM34", "UTM35", "UTM36"]
    for datum in datums:
        utm = df[df["Datum"] == datum]
        utm.to_csv(finalpath + datum + filetype, index=False)

In [4]:
def soilin_to_file(finalpath: str, df: pd.DataFrame) -> None:
    filetype = ".csv"
    name = "SoilInfitrationCapacity"
    df.to_csv(finalpath + name + filetype, index=False)

In [5]:
def extract_columns(col : List[str], df) -> Dict[str, pd.Series]:
    d = {}
    for key in col:
        if key in df.columns:
            d[key] = df[key]
            return d                        


In [29]:
df_sm : pd.DataFrame # Station means dataframe
path_sm : str = "../GEO3000/code/data/lh_0.925_final/station_means.txt"
df_sm = pd.read_csv(path_sm, delim_whitespace=True)
    
    
df_si : pd.DataFrame # station info
path_si : str = "./data/Stations info.xlsx"
df_si = pd.read_excel(path_si, header=1, dtype={"station": str})

In [30]:
# display(df_sm)
df_sm.drop(["Period", "Completness"], axis=1, inplace=True, errors="ignore")
df_sm["File"] = df_sm["File"].apply(lambda x: x[:-2])

In [40]:
df_1 = df_sm.copy()
df_1.set_index("File", inplace=True)

In [41]:
# df_si.drop(["elev_mean"], axis=1, inplace=True, errors="ignore")
# display(df_si) 

In [42]:
stations_with_info = df_si["station"] # Creates a series of all the stations of interest. 
stations_total = df_sm["File"]

In [43]:
df_1["Elevation mean"] = np.nan

Unnamed: 0_level_0,Area,BFI,Datum,East,North,Elevation mean
File,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2.634,183.6,0.472136,UTM32,271220,6733473,
12.209,554.1,0.523131,UTM32,219200,6724518,
105.1,137.6,0.515502,UTM32,129534,6983503,
88.4,234.9,0.644546,UTM32,74031,6885087,
2.28,869.8,0.492369,UTM32,246027,6795979,
...,...,...,...,...,...,...
22.22,203.6,0.436342,UTM32,78051,6461281,
83.2,508.1,0.633612,UTM32,13361,6833950,
2.145,11205.4,0.681366,UTM32,247417,6809063,
247.3,128.9,0.526866,UTM36,1091325,7802944,


In [44]:
df_2 = df_si.copy()
df_2.set_index("station", inplace=True)
for s in stations_with_info:
    if s in stations_total.values:
        df_1["Elevation mean"].loc[s] = df_2["elev_mean"].loc[s]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


Unnamed: 0_level_0,Area,BFI,Datum,East,North,Elevation mean
File,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2.634,183.6,0.472136,UTM32,271220,6733473,453.0
12.209,554.1,0.523131,UTM32,219200,6724518,806.0
105.1,137.6,0.515502,UTM32,129534,6983503,252.0
88.4,234.9,0.644546,UTM32,74031,6885087,
2.28,869.8,0.492369,UTM32,246027,6795979,
...,...,...,...,...,...,...
22.22,203.6,0.436342,UTM32,78051,6461281,196.0
83.2,508.1,0.633612,UTM32,13361,6833950,813.0
2.145,11205.4,0.681366,UTM32,247417,6809063,
247.3,128.9,0.526866,UTM36,1091325,7802944,


In [46]:
df_1.reset_index(inplace=True)
df_1 = df_1[columns_mapping]

Unnamed: 0,File,Area,BFI,Elevation mean,Datum,East,North
0,2.634,183.6,0.472136,453.0,UTM32,271220,6733473
1,12.209,554.1,0.523131,806.0,UTM32,219200,6724518
2,105.1,137.6,0.515502,252.0,UTM32,129534,6983503
3,88.4,234.9,0.644546,,UTM32,74031,6885087
4,2.28,869.8,0.492369,,UTM32,246027,6795979
...,...,...,...,...,...,...,...
95,22.22,203.6,0.436342,196.0,UTM32,78051,6461281
96,83.2,508.1,0.633612,813.0,UTM32,13361,6833950
97,2.145,11205.4,0.681366,,UTM32,247417,6809063
98,247.3,128.9,0.526866,,UTM36,1091325,7802944


In [49]:
finalpath_mapping: str = "../GEO3000/code/data/map/"
mapping_to_file(finalpath_mapping, df_1)

In [47]:
print(columns_mapping)
print(columns_soilin)

['File', 'Area', 'BFI', 'Elevation mean', 'Datum', 'East', 'North']
['File', 'Name', 'BFI', 'Artificial', 'Agriculture', 'Wetland', 'Waterbody', 'S1', 'S2', 'S3', 'S4', 'S5']
