In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [None]:
data = pd.read_csv('weather_data.csv')

In [None]:
data

In [None]:
# Encode Rain as 1 and No Rain as 0
data['rain_or_not'] = data['rain_or_not'].apply(lambda x: 1 if x == 'Rain' else 0)

In [None]:
data

In [None]:
# Creating a copy of the data
data_copy = data.copy()

In [None]:
data_copy

# Correlation Analysis

In [None]:
# Dropping the date column
data_copy.drop('date', axis=1, inplace=True)


In [None]:
data_copy.shape

In [None]:
# Dropping rows with missing values
data_copy.dropna(inplace=True)

In [None]:
data_copy.shape

In [None]:
# Create correlation matrix
corr_matrix = data_copy.corr()

corr_matrix

In [None]:
# Draw a graph between average temperature and humidity
plt.scatter(data_copy['avg_temperature'], data_copy['humidity'])
plt.xlabel('Average Temperature')
plt.ylabel('Humidity')
plt.title('Average Temperature vs Humidity')

There is high correlation between 'avg_temperature' and 'humidity'.

# Classification Models

In [None]:
# Split the data sets into training and testing sets
X = data_copy.drop('rain_or_not', axis=1)
y = data_copy['rain_or_not']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

X_train.shape, X_test.shape, y_train.shape, y_test.shape


In [None]:
X_train

#### Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

log_reg = LogisticRegression()
log_reg.fit(X_train, y_train)

y_pred = log_reg.predict(X_test)

accuracy_score(y_test, y_pred)

#### Decision Tree

In [None]:
# Decision Tree Classifier
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier()
dt.fit(X_train, y_train)

y_pred = dt.predict(X_test)

accuracy_score(y_test, y_pred)

#### Random Forest

In [None]:
## Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier()
rf.fit(X_train, y_train)

y_pred = rf.predict(X_test)

accuracy_score(y_test, y_pred)

#### XGBoost

In [None]:
from xgboost import XGBClassifier

xgb = XGBClassifier()
xgb.fit(X_train, y_train)

y_pred = xgb.predict(X_test)

accuracy_score(y_test, y_pred)

#### Majority Voting

In [None]:
# Implement majority voting
from sklearn.ensemble import VotingClassifier

voting_clf = VotingClassifier(estimators=[('lr', log_reg), ('rf', rf), ('xgb', xgb)], voting='hard')
voting_clf.fit(X_train, y_train)

y_pred = voting_clf.predict(X_test)

accuracy_score(y_test, y_pred)

# Box Plot Analysis

In [None]:
# Creating box plot for pressure
plt.boxplot(data_copy['pressure'])
plt.title('Box plot for Pressure')

In [None]:


# Creating box plot for cloud cover sowing outliers
plt.boxplot(data_copy['cloud_cover'])
plt.title('Box plot for Cloud Cover')

In [None]:

# Creating box plot for wind speed
plt.boxplot(data_copy['avg_wind_speed'])

plt.title('Box plot for Wind Speed')

In [None]:


# Creating box plot for humidity
plt.boxplot(data_copy['humidity'])
plt.title('Box plot for Humidity')


In [None]:
# Creating box plot for average temperature
plt.boxplot(data_copy['avg_temperature'])
plt.title('Box plot for Average Temperature')

# Normalizing the features

In [None]:
# Normalizing the data
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)

X_test_scaled = scaler.transform(X_test)



In [None]:
X_train_scaled

In [None]:
# Decision Tree Classifier
dt = DecisionTreeClassifier()
dt.fit(X_train_scaled, y_train)

y_pred = dt.predict(X_test_scaled)

accuracy_score(y_test, y_pred)

In [None]:
# Random Forest Classifier
rf = RandomForestClassifier()
rf.fit(X_train_scaled, y_train)

y_pred = rf.predict(X_test_scaled)

accuracy_score(y_test, y_pred)

In [None]:
# XGBoost Classifier
xgb = XGBClassifier()
xgb.fit(X_train_scaled, y_train)

y_pred = xgb.predict(X_test_scaled)

accuracy_score(y_test, y_pred)

In [None]:
# Multi Layer Perceptron
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(activation="relu", max_iter=1000)
mlp.fit(X_train_scaled, y_train)

y_pred = mlp.predict(X_test_scaled)

accuracy_score(y_test, y_pred)

In [None]:
# Logistic Regression
log_reg = LogisticRegression()
log_reg.fit(X_train_scaled, y_train)

y_pred = log_reg.predict(X_test_scaled)

accuracy_score(y_test, y_pred)

# Hyperparameter Tuning

In [None]:
# Grid Search for Logistic Regression
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000], 'max_iter': [100, 1000, 10000], 'solver': ['liblinear', 'newton-cholesky', 'sag', 'saga', 'lbfgs', 'newton-cg']}
grid_search = GridSearchCV(log_reg, param_grid, cv=5)

grid_search.fit(X_train_scaled, y_train)

grid_search.best_params_

In [None]:
y_pred = grid_search.predict(X_test_scaled)

accuracy_score(y_test, y_pred)

In [None]:
# Grid Search for MLP Classifier
parameter_grid = {'activation' : ['identity', 'logistic', 'tanh', 'relu'],
                   'solver' : ['lbfgs', 'sgd', 'adam'],
                   'alpha' : [0.0001,0.1,0.01]}

grid_search = GridSearchCV(mlp, parameter_grid, cv=5)

grid_search.fit(X_train_scaled, y_train)

grid_search.best_params_
                  

In [None]:
y_pred = grid_search.predict(X_test_scaled)

accuracy_score(y_test, y_pred)

# Time Series Forecasting

In [None]:
data

In [None]:
data.dropna(inplace=True)

In [None]:
data.shape

In [None]:
data.drop('rain_or_not', axis=1, inplace=True)

In [None]:
data

In [None]:
# Separate the first 270 rows for training
X_train = data.iloc[:270, :]
X_test = data.iloc[270:, :]
X_train.shape, X_test.shape

### Average Temperature

In [None]:
temp_train = X_train['date','avg_temperature']
temp_test = X_test['date','avg_temperature']

temp_train

In [None]:
from statsmodels.tsa.arima.model import ARIMA

model = ARIMA(temp_train, order=(4, 2, 1))
model_fit = model.fit()
print(model_fit.summary())



In [None]:
start = len(X_train)
end = len(X_test)


forecast = model_fit.predict(start, end, typ='levels').rename('ARIMA Predictions')
forecast.plot(legend=True)
temp_test.plot(legend=True)