In [92]:
import pandas as pd
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPRegressor
from sklearn.multioutput import MultiOutputRegressor

In [94]:
#clean train data set

In [96]:
data = pd.read_csv('train.csv')

In [98]:
# Step 1: Keep only the relevant columns
relevant_columns = ['kingdom', 'Year', 'Month', 'Day', 'Avg_Temperature', 
                    'Radiation', 'Rain_Amount', 'Wind_Speed', 'Wind_Direction']
data = data[relevant_columns]

In [100]:
data.isnull().sum()

kingdom            0
Year               0
Month              0
Day                0
Avg_Temperature    0
Radiation          0
Rain_Amount        0
Wind_Speed         0
Wind_Direction     0
dtype: int64

In [102]:
data.dtypes

kingdom             object
Year                 int64
Month                int64
Day                  int64
Avg_Temperature    float64
Radiation          float64
Rain_Amount        float64
Wind_Speed         float64
Wind_Direction       int64
dtype: object

In [104]:
# Step 2: Convert temperatures from Kelvin to Celsius where applicable
def convert_temp(temp):
    try:
        temp = float(temp)  # Ensure numeric
        if temp > 200:  # Assuming Kelvin if > 200
            return temp - 273.15
        return temp
    except (ValueError, TypeError):
        return None  # Return None for non-numeric values

data['Avg_Temperature'] = data['Avg_Temperature'].apply(convert_temp)

In [106]:
data.head()

Unnamed: 0,kingdom,Year,Month,Day,Avg_Temperature,Radiation,Rain_Amount,Wind_Speed,Wind_Direction
0,Arcadia,1,4,1,25.5,22.52,58.89,8.6,283
1,Atlantis,1,4,1,26.5,22.73,11.83,15.8,161
2,Avalon,1,4,1,26.3,22.73,11.83,15.8,161
3,Camelot,1,4,1,24.0,22.67,75.27,6.4,346
4,Dorne,1,4,1,28.0,22.35,4.81,16.7,185


In [108]:
# Encode categorical feature 'kingdom'
encoder = LabelEncoder()
data["kingdom"] = encoder.fit_transform(data["kingdom"])

In [110]:
data.head()

Unnamed: 0,kingdom,Year,Month,Day,Avg_Temperature,Radiation,Rain_Amount,Wind_Speed,Wind_Direction
0,0,1,4,1,25.5,22.52,58.89,8.6,283
1,1,1,4,1,26.5,22.73,11.83,15.8,161
2,2,1,4,1,26.3,22.73,11.83,15.8,161
3,3,1,4,1,24.0,22.67,75.27,6.4,346
4,4,1,4,1,28.0,22.35,4.81,16.7,185


In [112]:
data.dtypes

kingdom              int32
Year                 int64
Month                int64
Day                  int64
Avg_Temperature    float64
Radiation          float64
Rain_Amount        float64
Wind_Speed         float64
Wind_Direction       int64
dtype: object

In [114]:
#since 2012 leap and given data set 2 year is leap we add 2010 to every year
data['Year'] = data['Year'] + 2010  

In [116]:
data.head()

Unnamed: 0,kingdom,Year,Month,Day,Avg_Temperature,Radiation,Rain_Amount,Wind_Speed,Wind_Direction
0,0,2011,4,1,25.5,22.52,58.89,8.6,283
1,1,2011,4,1,26.5,22.73,11.83,15.8,161
2,2,2011,4,1,26.3,22.73,11.83,15.8,161
3,3,2011,4,1,24.0,22.67,75.27,6.4,346
4,4,2011,4,1,28.0,22.35,4.81,16.7,185


In [118]:
data["date"]=pd.to_datetime(data[['Year', 'Month', 'Day']])

In [120]:
data = data.drop(columns=['Year', 'Month', 'Day'])

In [122]:
data.head()

Unnamed: 0,kingdom,Avg_Temperature,Radiation,Rain_Amount,Wind_Speed,Wind_Direction,date
0,0,25.5,22.52,58.89,8.6,283,2011-04-01
1,1,26.5,22.73,11.83,15.8,161,2011-04-01
2,2,26.3,22.73,11.83,15.8,161,2011-04-01
3,3,24.0,22.67,75.27,6.4,346,2011-04-01
4,4,28.0,22.35,4.81,16.7,185,2011-04-01


In [124]:
data.to_csv("clean_data_set.csv", index=False)

In [126]:
# make test Data set

In [128]:
tdata = pd.read_csv('test.csv') 

In [130]:
tdata['Year'] = tdata['Year'] + 2010

In [132]:
tdata.head()

Unnamed: 0,ID,Year,Month,Day,kingdom
0,84961,2019,1,1,Arcadia
1,84962,2019,1,1,Atlantis
2,84963,2019,1,1,Avalon
3,84964,2019,1,1,Camelot
4,84965,2019,1,1,Dorne


In [134]:
tdata["date"]=pd.to_datetime(tdata[['Year', 'Month', 'Day']])

In [136]:
tdata.head()

Unnamed: 0,ID,Year,Month,Day,kingdom,date
0,84961,2019,1,1,Arcadia,2019-01-01
1,84962,2019,1,1,Atlantis,2019-01-01
2,84963,2019,1,1,Avalon,2019-01-01
3,84964,2019,1,1,Camelot,2019-01-01
4,84965,2019,1,1,Dorne,2019-01-01


In [138]:
tdata["kingdom"] = encoder.fit_transform(tdata["kingdom"])

In [140]:
tdata.head()

Unnamed: 0,ID,Year,Month,Day,kingdom,date
0,84961,2019,1,1,0,2019-01-01
1,84962,2019,1,1,1,2019-01-01
2,84963,2019,1,1,2,2019-01-01
3,84964,2019,1,1,3,2019-01-01
4,84965,2019,1,1,4,2019-01-01


In [142]:
tdata = tdata.drop(columns=['Year', 'Month', 'Day'])

In [144]:
tdata.head()

Unnamed: 0,ID,kingdom,date
0,84961,0,2019-01-01
1,84962,1,2019-01-01
2,84963,2,2019-01-01
3,84964,3,2019-01-01
4,84965,4,2019-01-01


In [146]:
tdata.to_csv("test_data_set.csv", index=False)

In [148]:
#train a model

In [150]:
# Load dataset
data = pd.read_csv('clean_data_set.csv', parse_dates=['date'])

In [152]:
# Extract features from 'date'
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day

In [154]:
# Define features (X) and targets (y)
X = data[['year', 'month', 'day', 'kingdom']]
y = data[['Avg_Temperature', 'Radiation', 'Rain_Amount', 'Wind_Speed', 'Wind_Direction']]

In [156]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [158]:
mlp = MLPRegressor(hidden_layer_sizes=(100, 50), activation='relu', solver='adam', max_iter=500)
model = MultiOutputRegressor(mlp)
model.fit(X_train, y_train)

In [173]:
# Evaluate the model
def smape(y_true, y_pred):
    numerator = np.abs(y_true - y_pred)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    # Handle division by zero when both y_true and y_pred are zero
    ratio = np.divide(numerator, denominator, out=np.zeros_like(numerator), where=denominator != 0)
    return 100 * np.mean(ratio)

In [175]:
y_pred = model.predict(X_test)

In [177]:
error = smape(y_test, y_pred)
print(f"sMAPE Error: {error:.2f}%")

sMAPE Error: 38.98%


In [179]:
#make predictions

In [181]:
tdata = pd.read_csv('test_data_set.csv',parse_dates=['date'])

In [183]:
tdata.head()

Unnamed: 0,ID,kingdom,date
0,84961,0,2019-01-01
1,84962,1,2019-01-01
2,84963,2,2019-01-01
3,84964,3,2019-01-01
4,84965,4,2019-01-01


In [185]:
tdata['year'] = tdata['date'].dt.year
tdata['month'] = tdata['date'].dt.month
tdata['day'] = tdata['date'].dt.day

tdata = tdata[['ID','year', 'month','day','kingdom']]

In [187]:
tdata.dtypes

ID         int64
year       int32
month      int32
day        int32
kingdom    int64
dtype: object

In [189]:
prediction = model.predict(tdata[['year', 'month','day','kingdom']])

In [191]:
# Convert predictions to a DataFrame for CSV export
prediction_columns = ['Avg_Temperature', 'Radiation', 'Rain_Amount', 'Wind_Speed', 'Wind_Direction']
predictions_df = pd.DataFrame(prediction, columns=prediction_columns)

In [193]:
#add id column
predictions_df['ID']=tdata['ID']

In [195]:
# Reorder columns with 'ID' as the first column
predictions_df = predictions_df[['ID', 'Avg_Temperature', 'Radiation', 'Rain_Amount', 'Wind_Speed', 'Wind_Direction']]

In [197]:
predictions_df.head()

Unnamed: 0,ID,Avg_Temperature,Radiation,Rain_Amount,Wind_Speed,Wind_Direction
0,84961,26.148573,21.229376,0.140832,14.030961,85.886361
1,84962,26.153751,21.235485,0.128651,13.904771,86.599322
2,84963,26.158929,21.241594,0.11647,13.778581,87.312284
3,84964,26.164107,21.247703,0.104288,13.652391,88.025245
4,84965,26.169355,21.253812,0.092107,13.526201,88.20933


In [199]:
# Save to CSV
predictions_df.to_csv('final_submittion.csv', index=False)