##**Data Loading**

In [2]:
from google.colab import files
uploaded = files.upload()

Saving DailyDelhiClimateTrain.csv to DailyDelhiClimateTrain.csv


##**Importing Libraries**

In [28]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error, mean_absolute_error, classification_report
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

#**Data Preprocessing**

In [29]:
df = pd.read_csv('DailyDelhiClimateTrain.csv')
df

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2013-01-01,10.000000,84.500000,0.000000,1015.666667
1,2013-01-02,7.400000,92.000000,2.980000,1017.800000
2,2013-01-03,7.166667,87.000000,4.633333,1018.666667
3,2013-01-04,8.666667,71.333333,1.233333,1017.166667
4,2013-01-05,6.000000,86.833333,3.700000,1016.500000
...,...,...,...,...,...
1457,2016-12-28,17.217391,68.043478,3.547826,1015.565217
1458,2016-12-29,15.238095,87.857143,6.000000,1016.904762
1459,2016-12-30,14.095238,89.666667,6.266667,1017.904762
1460,2016-12-31,15.052632,87.000000,7.325000,1016.100000


In [30]:
df.isnull().sum()

Unnamed: 0,0
date,0
meantemp,0
humidity,0
wind_speed,0
meanpressure,0


In [31]:
df.describe()

Unnamed: 0,meantemp,humidity,wind_speed,meanpressure
count,1462.0,1462.0,1462.0,1462.0
mean,25.495521,60.771702,6.802209,1011.104548
std,7.348103,16.769652,4.561602,180.231668
min,6.0,13.428571,0.0,-3.041667
25%,18.857143,50.375,3.475,1001.580357
50%,27.714286,62.625,6.221667,1008.563492
75%,31.305804,72.21875,9.238235,1014.944901
max,38.714286,100.0,42.22,7679.333333


In [32]:
df = df.round(2)
df

Unnamed: 0,date,meantemp,humidity,wind_speed,meanpressure
0,2013-01-01,10.00,84.50,0.00,1015.67
1,2013-01-02,7.40,92.00,2.98,1017.80
2,2013-01-03,7.17,87.00,4.63,1018.67
3,2013-01-04,8.67,71.33,1.23,1017.17
4,2013-01-05,6.00,86.83,3.70,1016.50
...,...,...,...,...,...
1457,2016-12-28,17.22,68.04,3.55,1015.57
1458,2016-12-29,15.24,87.86,6.00,1016.90
1459,2016-12-30,14.10,89.67,6.27,1017.90
1460,2016-12-31,15.05,87.00,7.32,1016.10


In [52]:
df1 = df.drop('date', axis=1)

In [58]:
X = df1.drop('humidity', axis=1)
y = df1['humidity']
X

Unnamed: 0,meantemp,wind_speed,meanpressure
0,10.00,0.00,1015.67
1,7.40,2.98,1017.80
2,7.17,4.63,1018.67
3,8.67,1.23,1017.17
4,6.00,3.70,1016.50
...,...,...,...
1457,17.22,3.55,1015.57
1458,15.24,6.00,1016.90
1459,14.10,6.27,1017.90
1460,15.05,7.32,1016.10


##**Model Selection and Training**

In [61]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [64]:
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

##**Metric Performance**

In [65]:
r_squared = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print(f"R_squared: {r_squared}")
print(f"MSE: {mse}")
print(f"MAE: {mae}")


R_squared: 0.3744993599869819
MSE: 163.8653514200505
MAE: 10.219305188460659


##**Predicting Sample Data**

In [66]:
import numpy as np
sample_data = pd.DataFrame({
    'meantemp': [15.00],
    'wind_speed': [10.00],
    'meanpressure': [1000.00]
})

prediction = model.predict(sample_data)
print(f"Prediction: {prediction}")

Prediction: [70.37240067]


##**Feature Importance**

In [67]:
importance = model.coef_
feature_names = X.columns
for feature, importance_value in zip(feature_names, importance):
    print(f"{feature}: {importance_value}")

meantemp: -1.1698537170161227
wind_speed: -0.7803131818520922
meanpressure: -0.001782247108476187
