# MSE Model Training

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
df=pd.read_csv("Cleaned1.csv")
df.head()

Unnamed: 0,Country,Balance,Product,Value,Month,Date
0,Australia,Net Electricity Production,Electricity,23130.2764,Mar,24
1,Australia,Net Electricity Production,Total Combustible Fuels,14353.8714,Mar,24
2,Australia,Net Electricity Production,"Coal, Peat and Manufactured Gases",10304.7825,Mar,24
3,Australia,Net Electricity Production,Oil and Petroleum Products,330.5351,Mar,24
4,Australia,Net Electricity Production,Natural Gas,3492.4621,Mar,24


In [3]:
from sklearn.preprocessing import LabelEncoder
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

In [4]:
country_encoder = LabelEncoder().fit(df['Country'])
balance_encoder = LabelEncoder().fit(df['Balance'])
product_encoder = LabelEncoder().fit(df['Product'])
month_encoder = LabelEncoder().fit(df['Month'])

In [5]:
# Transform df
df['Country'] = country_encoder.transform(df['Country'])
df['Balance'] = balance_encoder.transform(df['Balance'])
df['Product'] = product_encoder.transform(df['Product'])
df['Month'] = month_encoder.transform(df['Month'])

In [6]:
# # Save the encoders for future use
joblib.dump(country_encoder, 'country_encoder.pkl')
joblib.dump(balance_encoder, 'balance_encoder.pkl')
joblib.dump(product_encoder, 'product_encoder.pkl')
joblib.dump(month_encoder, 'month_encoder.pkl')

['month_encoder.pkl']

## SPLIT THE DATA FRAME 

In [7]:
df

Unnamed: 0,Country,Balance,Product,Value,Month,Date
0,1,2,2,23130.2764,7,24
1,1,2,12,14353.8714,7,24
2,1,2,0,10304.7825,7,24
3,1,2,8,330.5351,7,24
4,1,2,5,3492.4621,7,24
...,...,...,...,...,...,...
140480,19,4,2,34846.5580,4,10
140481,19,3,2,32372.5830,4,10
140482,19,5,2,6869.2530,4,10
140483,19,0,2,62000.7330,4,10


In [8]:
# split dataframe...
df1 = df[df['Balance'] == 2]

In [9]:
df2 = df[df['Balance'] != 2]

In [10]:
dff1 = df1 # Copying dataframe which having 'Balance' == 'Net Electricity Production'
dff2 = df2 # Copying dataframe which having 'Balance' != 'Net Electricity Production'

# For self security  (-;

In [11]:
# Model training for df1
x = dff1.drop(columns=['Value'], axis=1)
y = dff1['Value']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=3)

# Standard Scaler Transform
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

model1 = RandomForestRegressor(n_estimators=100)
model1.fit(x_train, y_train)
joblib.dump(model1, 'Model_1.pkl')

['Model_1.pkl']

In [12]:
# Model training for df2
p = dff2.drop(columns=['Value'], axis=1)
q = dff2['Value']
p_train, p_test, q_train, q_test = train_test_split(p, q, test_size=0.3, random_state=3)

# Standard Scaler Transform
p_train = sc.fit_transform(p_train)
p_test = sc.transform(p_test)

model2 = RandomForestRegressor(n_estimators=100)
model2.fit(p_train, q_train)
joblib.dump(model2, 'Model_2.pkl')

['Model_2.pkl']

## Taking Example

In [16]:
# Prediction Example
# Example input
input_data = [['Austria', 'Total Exports', 'Natural Gas', 'Mar', 24]]

# Load the encoders
country_encoder = joblib.load('country_encoder.pkl')
balance_encoder = joblib.load('balance_encoder.pkl')
product_encoder = joblib.load('product_encoder.pkl')
month_encoder = joblib.load('month_encoder.pkl')

# Create a DataFrame for the input data
input_df = pd.DataFrame(input_data, columns=['Country', 'Balance', 'Product', 'Month', 'Data'])

# Encode the input data
input_df['Country'] = country_encoder.transform(input_df['Country'])
input_df['Balance'] = balance_encoder.transform(input_df['Balance'])
input_df['Product'] = product_encoder.transform(input_df['Product'])
input_df['Month'] = month_encoder.transform(input_df['Month'])

# Extract the features
input_features = input_df[['Country', 'Balance', 'Product', 'Month', 'Data']].values

# Load the trained model
model = joblib.load('model_1.pkl')  # Use model_2.pkl if appropriate

# Make a prediction
predicted_price = model.predict(input_features)
print(f"The predicted price is: {predicted_price[0]}")

The predicted price is: 29471.52699700001


In [17]:
input_data = [['Austria', 'Net Electricity Production', 'Natural Gas', 'Mar', 24]]

# Load the encoders
country_encoder = joblib.load('country_encoder.pkl')
balance_encoder = joblib.load('balance_encoder.pkl')
product_encoder = joblib.load('product_encoder.pkl')
month_encoder = joblib.load('month_encoder.pkl')

# Create a DataFrame for the input data
input_df = pd.DataFrame(input_data, columns=['Country', 'Balance', 'Product', 'Month', 'Data'])

# Encode the input data
input_df['Country'] = country_encoder.transform(input_df['Country'])
input_df['Balance'] = balance_encoder.transform(input_df['Balance'])
input_df['Product'] = product_encoder.transform(input_df['Product'])
input_df['Month'] = month_encoder.transform(input_df['Month'])

# Extract the features
input_features = input_df[['Country', 'Balance', 'Product', 'Month', 'Data']].values

# Load the trained model
model = joblib.load('model_2.pkl')  # Use model_2.pkl if appropriate

# Make a prediction
predicted_price = model.predict(input_features)
print(f"The predicted price is: {predicted_price[0]}")

The predicted price is: 2730.3066419999996
