In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:

df = pd.read_csv('london_weather.csv')

print(df.head())

In [None]:
print(df.isnull().sum())

In [None]:

df.fillna(df.mean(), inplace=True)

X = df.drop(['mean_temp', 'date'], axis=1)
y = df['mean_temp']


In [None]:


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

linear_model = LinearRegression()
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
xgb_model = XGBRegressor(n_estimators=100, random_state=42)


In [None]:

linear_model.fit(X_train, y_train)
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)

y_pred_linear = linear_model.predict(X_test)
y_pred_rf = rf_model.predict(X_test)
y_pred_xgb = xgb_model.predict(X_test)


In [None]:

mse_linear = mean_squared_error(y_test, y_pred_linear)
r2_linear = r2_score(y_test, y_pred_linear)

mse_rf = mean_squared_error(y_test, y_pred_rf)
r2_rf = r2_score(y_test, y_pred_rf)

mse_xgb = mean_squared_error(y_test, y_pred_xgb)
r2_xgb = r2_score(y_test, y_pred_xgb)

print(f"Linear Regression - MSE: {mse_linear}, R2: {r2_linear}")
print(f"Random Forest - MSE: {mse_rf}, R2: {r2_rf}")
print(f"XGBoost - MSE: {mse_xgb}, R2: {r2_xgb}")



In [None]:

plt.figure(figsize=(18, 6))

plt.subplot(1, 3, 1)
plt.scatter(y_test, y_pred_linear, alpha=0.5)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')
plt.title('Linear Regression: Actual vs Predicted')
plt.xlabel('Actual')
plt.ylabel('Predicted')

plt.subplot(1, 3, 2)
plt.scatter(y_test, y_pred_rf, alpha=0.5)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')
plt.title('Random Forest: Actual vs Predicted')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.subplot(1, 3, 3)
plt.scatter(y_test, y_pred_xgb, alpha=0.5)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], color='red', linestyle='--')
plt.title('XGBoost: Actual vs Predicted')
plt.xlabel('Actual')
plt.ylabel('Predicted')

plt.tight_layout()
plt.show()



In [None]:

plt.figure(figsize=(18, 6))

plt.subplot(1, 3, 1)
sns.residplot(x=y_test, y=y_pred_linear, lowess=True, line_kws={'color': 'red', 'lw': 1})
plt.title('Linear Regression Residuals')
plt.xlabel('Actual')
plt.ylabel('Residuals')


plt.subplot(1, 3, 2)
sns.residplot(x=y_test, y=y_pred_rf, lowess=True, line_kws={'color': 'red', 'lw': 1})
plt.title('Random Forest Residuals')
plt.xlabel('Actual')
plt.ylabel('Residuals')


plt.subplot(1, 3, 3)
sns.residplot(x=y_test, y=y_pred_xgb, lowess=True, line_kws={'color': 'red', 'lw': 1})
plt.title('XGBoost Residuals')
plt.xlabel('Actual')
plt.ylabel('Residuals')


plt.tight_layout()
plt.show()
