In [1]:
import pandas as pd

# Read the hydro data
hydro_data_path = 'hydro - data.csv'
hydro_data_df = pd.read_csv(hydro_data_path)

# Read the water quality parameters data
water_quality_parameters_path = 'hydro - water_quality_parameters.csv'
water_quality_parameters_df = pd.read_csv(water_quality_parameters_path)

# Show the first few rows of each DataFrame
hydro_data_df.head(), water_quality_parameters_df.head()


(   Year     Month DO (mg/L)    pH Conductivity (uS/cm)  BOD Nitrates  \
 0  2015   January       7.3  7.97                  821  0.6     0.42   
 1  2015  Febraury       7.1  7.54                  892  0.7     0.52   
 2  2015     March       7.2  7.32                  912  1.4     0.58   
 3  2015     April       7.8  7.92                  943  1.8     0.62   
 4  2015       May       7.5  7.28                  896  1.8     0.56   
 
   Turbidity  TDS Fluoride  
 0         5  535     0.25  
 1         6  580     0.02  
 2         5  757     0.91  
 3         8  592      1.4  
 4         6  542     1.28  ,
   water quality parametrs  measured values  ideal values  standard values
 0            conductivity            350.0             0            300.0
 1                     bod              4.0             8              6.0
 2                     tds            334.0             0            500.0
 3                nitrades             47.0             0             45.0
 4        

In [2]:
# We need to handle the missing (NaN) values and convert columns to appropriate numerical types for further processing.

# Convert all relevant columns to numeric, coercing errors to NaNs
columns_to_convert = ['DO (mg/L)', 'pH', 'Conductivity (uS/cm)', 'BOD', 'Nitrates', 'Turbidity', 'TDS', 'Fluoride']
hydro_data_df[columns_to_convert] = hydro_data_df[columns_to_convert].apply(pd.to_numeric, errors='coerce')

# Check for missing values
missing_values = hydro_data_df.isnull().sum()
missing_values


Year                    0
Month                   0
DO (mg/L)               5
pH                      5
Conductivity (uS/cm)    5
BOD                     5
Nitrates                5
Turbidity               5
TDS                     5
Fluoride                5
dtype: int64

In [3]:
# To handle missing values, we can use simple imputation methods like mean imputation.
from sklearn.impute import SimpleImputer

# Initialize imputer
imputer = SimpleImputer(strategy='mean')

# Apply imputation
hydro_data_df[columns_to_convert] = imputer.fit_transform(hydro_data_df[columns_to_convert])

# Verify that there are no more missing values
hydro_data_df.isnull().sum()


Year                    0
Month                   0
DO (mg/L)               0
pH                      0
Conductivity (uS/cm)    0
BOD                     0
Nitrates                0
Turbidity               0
TDS                     0
Fluoride                0
dtype: int64

In [4]:

water_quality_parameters_df.isnull().sum(), water_quality_parameters_df.dtypes


(water quality parametrs    0
 measured values            0
 ideal values               0
 standard values            0
 dtype: int64,
 water quality parametrs     object
 measured values            float64
 ideal values                 int64
 standard values            float64
 dtype: object)

In [5]:

# Save the preprocessed hydro data
preprocessed_hydro_data_path = 'preprocessed_hydro_data.csv'
hydro_data_df.to_csv(preprocessed_hydro_data_path, index=False)

# Save the water quality parameters data
preprocessed_water_quality_parameters_path = 'preprocessed_water_quality_parameters.csv'
water_quality_parameters_df.to_csv(preprocessed_water_quality_parameters_path, index=False)

preprocessed_hydro_data_path, preprocessed_water_quality_parameters_path


('preprocessed_hydro_data.csv', 'preprocessed_water_quality_parameters.csv')

In [2]:
import pandas as pd
# This is based on the general formula for WQI calculation
water_quality_params = {
    'water quality parameters': ['conductivity', 'bod', 'tds', 'nitrates', 'fluorides', 'iron', 'ph', 'turbidity'],
    'ideal values': [0, 8, 0, 0, 0, 0, 7, 0]
}

# Sample data for actual water quality measurements
# This is hypothetical data for the purpose of this example
wqi_data = {
    'conductivity': [350, 400, 380],
    'bod': [4, 5, 3],
    'tds': [334, 340, 320],
    'nitrates': [47, 50, 45],
    'fluorides': [10.2, 9.8, 10.1],
    'iron': [0.2, 0.1, 0.15],
    'ph': [6.2, 6.4, 6.3],
    'turbidity': [2, 3, 2.5]
}

# Convert these dictionaries to DataFrames
df_quality_params = pd.DataFrame(water_quality_params)
df_wqi = pd.DataFrame(wqi_data)

# Function to calculate WQI
def calculate_WQI(df_wqi, df_quality_params):
    WQI = 0
    for index, row in df_quality_params.iterrows():
        param = row['water quality parameters']
        ideal_value = row['ideal values']
        actual_value = df_wqi[param].mean()  # Assuming you're using the mean value for each parameter
        WQI += actual_value - ideal_value  # For simplicity, not considering weight and standard values here
    return WQI

# Calculate WQI using the function
WQI_value = calculate_WQI(df_wqi, df_quality_params)
WQI_value


763.3166666666666