In [30]:
import pandas as pd
from sklearn.impute import SimpleImputer
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Load the data from a CSV file
df = pd.read_csv('cpcb_dly_aq_tamil_nadu-2014.csv')

# Impute missing values in 'SO2' and 'NO2' with the mean
imputer = SimpleImputer(strategy='mean')
df[['SO2', 'NO2']] = imputer.fit_transform(df[['SO2', 'NO2']])

# Impute missing values in the target variable 'RSPM/PM10' with the mean
target_imputer = SimpleImputer(strategy='mean')
df['RSPM/PM10'] = target_imputer.fit_transform(df[['RSPM/PM10']])

# Convert the 'Sampling Date' column to datetime
df['Sampling Date'] = pd.to_datetime(df['Sampling Date'])

# Split the data into features (X) and target (y)
X = df[['SO2', 'NO2']]
y = df['RSPM/PM10']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and fit the HistGradientBoostingRegressor model
model = HistGradientBoostingRegressor()
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate and print the Mean Squared Error (MSE)
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')


Mean Squared Error: 709.8069612288716


In [22]:
print(df.head())


   Stn Code Sampling Date       State City/Town/Village/Area  \
0        38    2014-01-02  Tamil Nadu                Chennai   
1        38    2014-01-07  Tamil Nadu                Chennai   
2        38    2014-01-21  Tamil Nadu                Chennai   
3        38    2014-01-23  Tamil Nadu                Chennai   
4        38    2014-01-28  Tamil Nadu                Chennai   

                     Location of Monitoring Station  \
0  Kathivakkam, Municipal Kalyana Mandapam, Chennai   
1  Kathivakkam, Municipal Kalyana Mandapam, Chennai   
2  Kathivakkam, Municipal Kalyana Mandapam, Chennai   
3  Kathivakkam, Municipal Kalyana Mandapam, Chennai   
4  Kathivakkam, Municipal Kalyana Mandapam, Chennai   

                                    Agency Type of Location   SO2   NO2  \
0  Tamilnadu State Pollution Control Board  Industrial Area  11.0  17.0   
1  Tamilnadu State Pollution Control Board  Industrial Area  13.0  17.0   
2  Tamilnadu State Pollution Control Board  Industrial Area