In [120]:
# %matplotlib Qt
%matplotlib inline

In [121]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import jdatetime

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error

from modules import LLS
from train_test_split import my_train_test_split

In [122]:
data = pd.read_csv('data/Dollar_Rial_Price_Dataset.csv')
data.head()

Unnamed: 0.1,Unnamed: 0,Date,Persian_Date,Open,Low,High,Close
0,0,11/27/2011,1390/09/06,13700,13700,13700,13700
1,1,11/28/2011,1390/09/07,13440,13440,13440,13440
2,2,11/29/2011,1390/09/08,13350,13350,13350,13350
3,3,11/30/2011,1390/09/09,13400,13400,13400,13400
4,4,12/1/2011,1390/09/10,13500,13500,13500,13500


### Pre Proccessing

In [123]:
# Convert prices from str to int
def str_to_int(x):
    return int(x.replace(",", ""))

# Convert the date from str to Jalali date
def str_to_jdate(persian_date):
    parts = persian_date.split('/')
    return jdatetime.date(int(parts[0]), int(parts[1]), int(parts[2]))

# Convert the solar date to the number of days from a specific date
def date_to_days(persian_date):
    reference_date = jdatetime.date(1300, 1, 1)
    delta = persian_date - reference_date
    return delta.days

data["Persian_Date"] = data["Persian_Date"].apply(str_to_jdate)

data["High"] = data["High"].apply(str_to_int)
data["Low"] = data["Low"].apply(str_to_int)
data["Open"] = data["Open"].apply(str_to_int)
data["Close"] = data["Close"].apply(str_to_int)

data["Days"] = data["Persian_Date"].apply(date_to_days)

data.head()

Unnamed: 0.1,Unnamed: 0,Date,Persian_Date,Open,Low,High,Close,Days
0,0,11/27/2011,1390-09-06,13700,13700,13700,13700,33123
1,1,11/28/2011,1390-09-07,13440,13440,13440,13440,33124
2,2,11/29/2011,1390-09-08,13350,13350,13350,13350,33125
3,3,11/30/2011,1390-09-09,13400,13400,13400,13400,33126
4,4,12/1/2011,1390-09-10,13500,13500,13500,13500,33127


In [124]:
ahmadinejad = data.loc[data["Persian_Date"] < jdatetime.date(1392, 5, 12)].reset_index()
rohani = data.loc[(data["Persian_Date"] >= jdatetime.date(1392, 5, 12)) & (data["Persian_Date"] < jdatetime.date(1400, 5, 12))].reset_index()
raisi = data.loc[(data["Persian_Date"] >= jdatetime.date(1400, 5, 12))].reset_index()

# ahmadinejad
# rohani
raisi.head()

Unnamed: 0.1,index,Unnamed: 0,Date,Persian_Date,Open,Low,High,Close,Days
0,2857,2857,8/3/2021,1400-05-12,255790,255690,258140,257290,36660
1,2858,2858,8/4/2021,1400-05-13,257250,254690,257340,256190,36661
2,2859,2859,8/5/2021,1400-05-14,256490,255790,256740,256190,36662
3,2860,2860,8/7/2021,1400-05-16,254190,253990,257140,256380,36664
4,2861,2861,8/8/2021,1400-05-17,256290,252390,257440,256890,36665


In [125]:
def calculate_growth_rate(highest_price, lowest_price):
    return f"%{round(((highest_price - lowest_price) / lowest_price) * 100, 0)}"

In [126]:
ahmadinejad_high = ahmadinejad['High'].max()
ahmadinejad_low = ahmadinejad['High'].min()
ahmadinejad_growth = calculate_growth_rate(ahmadinejad_high, ahmadinejad_low)

rohani_high = rohani['High'].max()
rohani_low = rohani['High'].min()
rohani_growth = calculate_growth_rate(rohani_high, rohani_low)

raisi_high = raisi['High'].max()
raisi_low = raisi['High'].min()
raisi_growth = calculate_growth_rate(raisi_high, raisi_low)

In [127]:
# Calculation of the highest and lowest price in each period
results = pd.DataFrame({
    "President": ["Ahmadinejad", "Rohani", "Raisi"],
    "Highest Price": [ahmadinejad_high, rohani_high, raisi_high],
    "Lowest Price": [ahmadinejad_low, rohani_low, raisi_low],
    "Growth Rate (%)": [ahmadinejad_growth, rohani_growth, raisi_growth]
})
results

Unnamed: 0,President,Highest Price,Lowest Price,Growth Rate (%)
0,Ahmadinejad,39700,13350,%197.0
1,Rohani,320060,29150,%998.0
2,Raisi,555600,254300,%118.0


In [128]:
def train_and_evaluate(data):
    X = data[["Days"]].values
    y = data["High"].values
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mae = round(mean_absolute_error(y_test, y_pred), 0)
    
    return mae

In [129]:
results = pd.DataFrame({
    "President": ["Ahmadinejad", "Rohani", "Raisi"],
    "MAE": [
        train_and_evaluate(ahmadinejad),
        train_and_evaluate(rohani),
        train_and_evaluate(raisi)
    ],
})
results

Unnamed: 0,President,MAE
0,Ahmadinejad,2820.0
1,Rohani,31766.0
2,Raisi,35573.0
