In [9]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split
import warnings
import seaborn as sns
warnings.filterwarnings("ignore")


In [10]:
df = pd.read_csv('CottonData.csv')
df.head()

Unnamed: 0,Day,Month,Year,State,District,Market,Price
0,6,2,2015,Andhra Pradesh,Anantapur,Gooti,4150.0
1,7,2,2015,Andhra Pradesh,Anantapur,Gooti,4150.0
2,8,2,2015,Andhra Pradesh,Anantapur,Gooti,4150.0
3,9,2,2015,Andhra Pradesh,Anantapur,Gooti,4150.0
4,10,2,2015,Andhra Pradesh,Anantapur,Gooti,4150.0


In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 239458 entries, 0 to 239457
Data columns (total 7 columns):
 #   Column    Non-Null Count   Dtype  
---  ------    --------------   -----  
 0   Day       239458 non-null  int64  
 1   Month     239458 non-null  int64  
 2   Year      239458 non-null  int64  
 3   State     239458 non-null  object 
 4   District  239458 non-null  object 
 5   Market    239458 non-null  object 
 6   Price     239458 non-null  float64
dtypes: float64(1), int64(3), object(3)
memory usage: 12.8+ MB


In [12]:
df.shape

(239458, 7)

In [13]:
print(min(df['Price']))
print(max(df['Price']))

0.0
4120383.0


In [15]:
#Data Cleaning
df = df[df['Price'] > 0]
df = df[df['Price'] < 13000]

In [14]:
print(min(df['Price']))
print(max(df['Price']))

0.0
4120383.0


In [16]:
#DATE MERGING
df['Day']=df['Day'].apply(lambda x: '{0:0>2}'.format(x))
df['Month']=df['Month'].apply(lambda x: '{0:0>2}'.format(x))
df['Year'] = df['Year'].apply(str)
df['Day']=df['Day'].apply(str)
df['Month']=df['Month'].apply(str)
df['date'] = df['Year'].str.cat(df['Month'], sep ="-")
df['date'] = df['date'].str.cat(df['Day'], sep ="-")
df = df.drop(['Day', 'Month', "Year"], axis=1)
df.head()

Unnamed: 0,State,District,Market,Price,date
0,Andhra Pradesh,Anantapur,Gooti,4150.0,2015-02-06
1,Andhra Pradesh,Anantapur,Gooti,4150.0,2015-02-07
2,Andhra Pradesh,Anantapur,Gooti,4150.0,2015-02-08
3,Andhra Pradesh,Anantapur,Gooti,4150.0,2015-02-09
4,Andhra Pradesh,Anantapur,Gooti,4150.0,2015-02-10


In [17]:
df['State'].unique()

array(['Andhra Pradesh', 'Gujarat', 'Haryana', 'Karnataka',
       'Madhya Pradesh', 'Maharashtra', 'Meghalaya', 'Odisha',
       'Pondicherry', 'Punjab', 'Rajasthan', 'Tamil Nadu', 'Telangana',
       'Uttar Pradesh'], dtype=object)

In [None]:
df_prop= pd.DataFrame()
df_prop['ds'] = pd.to_datetime(df["date"])
df_prop['y'] = df["Price"]
df_prop['State'] = df["State"]
df_prop['District'] = df["District"]
df_prop['Market'] = df["Market"]
df_prop.head()

In [None]:
# Encoding Categorical Columns
df_prop['State'] = df_prop['State'].astype('category')
df_prop['District'] = df_prop['District'].astype('category')
df_prop['Market'] = df_prop['Market'].astype('category')
df_prop['State_Code'] = df_prop['State'].cat.codes
df_prop['District_Code'] = df_prop['District'].cat.codes
df_prop['Market_Code'] = df_prop['Market'].cat.codes
df_prop.head()

In [11]:
#making dictionaries for categorical attributes
state_dict = pd.Series(df_prop.State_Code.values, index=df_prop.State).to_dict()
district_dict = pd.Series(df_prop.District_Code.values, index=df_prop.District).to_dict()
market_dict = pd.Series(df_prop.Market_Code.values, index=df_prop.Market).to_dict()
df_prop = df_prop.drop(['State', 'District', 'Market'], axis=1)
df_prop.head()

Unnamed: 0,ds,y,State_Code,District_Code,Market_Code
0,2015-02-06,4150.0,0,11,202
1,2015-02-07,4150.0,0,11,202
2,2015-02-08,4150.0,0,11,202
3,2015-02-09,4150.0,0,11,202
4,2015-02-10,4150.0,0,11,202


In [12]:
print(state_dict)

{'Andhra Pradesh': 0, 'Gujarat': 1, 'Haryana': 2, 'Karnataka': 3, 'Madhya Pradesh': 4, 'Maharashtra': 5, 'Meghalaya': 6, 'Odisha': 7, 'Pondicherry': 8, 'Punjab': 9, 'Rajasthan': 10, 'Tamil Nadu': 11, 'Telangana': 12, 'Uttar Pradesh': 13}


In [None]:
# train, test = train_test_split(df_prop, test_size=0.2, random_state=42)

train = df_prop[:-48000] # remove last 48000 from training (80 - 20)
test = df_prop[-48000:] # last 48000 for testing

In [None]:
prophet_model = Prophet(daily_seasonality = True)
prophet_model.add_regressor('State_Code')
prophet_model.add_regressor('District_Code')
prophet_model.add_regressor('Market_Code')
prophet_model.fit(train)

In [None]:
#Model Saving 
import pickle
# with open('model_train_test.pckl', 'wb') as fout:
#     pickle.dump(prophet_model, fout)

# # Model retrieval
with open('model_train_test.pckl', 'rb') as fin:
    prophet_model = pickle.load(fin)

In [None]:
test_wihtout_label = test.drop(['y'], axis=1)

In [None]:
test_pred = prophet_model.predict(test_wihtout_label)

In [None]:
# sklearn metrics
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import max_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score

y_true = test['y']
y_pred = test_pred['yhat']

print( "Mean Absolute Error is", mean_absolute_error(y_true, y_pred)) 
# print( "Max Error is", max_error(y_true, y_pred)) 
# print( "Mean Squared Error is", mean_squared_error(y_true, y_pred)) 
print( "R2 score is",r2_score(y_true, y_pred)) #BEST= +1 or -1

In [None]:
def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mean_absolute_percentage_error(test['y'],test_pred['yhat'])

In [None]:
fig1 = prophet_model.plot(test_pred)

In [None]:
future = prophet_model.make_future_dataframe(periods=90)

In [None]:
test_data = [['2022-02-14', '0', '11', '202']] 
df_test = pd.DataFrame(test_data, columns = ['ds', 'State_Code', 'District_Code', 'Market_Code'])
df_test.head()

In [None]:
future.head()

In [None]:
len(train)

In [None]:
forecast = prophet_model.predict(future)

In [None]:
fig1 = prophet_model.plot(forecast)

In [None]:
df_test.info()

In [None]:
test_data = [['2015-02-06', '0', '11', '202']] 
df_test = pd.DataFrame(test_data, columns = ['ds', 'State_Code', 'District_Code', 'Market_Code'])
df_test.head()

In [None]:
forecast_test = prophet_model.predict(df_test)

In [None]:
forecast_test

In [None]:
fig1 = prophet_model.plot(forecast_test)

In [None]:
# Single Day Price Prediction

import datetime
input_state = "Andhra Pradesh"
input_district = "Anantapur"
input_market = "Gooti"
input_date = "15-03-15"

State_Code = state_dict[input_state]
District_Code = district_dict[input_district]
Market_Code = market_dict[input_market]

ds = datetime.datetime.strptime(input_date, "%d-%m-%y").strftime("%Y-%m-%d")
ds = pd.to_datetime(ds)

input_data = [[ds, State_Code, District_Code, Market_Code]]
df_pred = pd.DataFrame(input_data, columns = ['ds', 'State_Code', 'District_Code', 'Market_Code'])

#converting df columns to category dtype
df_pred['State_Code'] = df_pred.State_Code.astype('category')
df_pred['District_Code'] = df_pred.District_Code.astype('category')
df_pred['Market_Code'] = df_pred.Market_Code.astype('category')

pred_result = prophet_model.predict(df_pred)

In [None]:
# Week Price Prediction
def get_week_prediction(input_state, input_district, input_market):
    import datetime
    from datetime import date
    from datetime import timedelta
    today = date.today()
    input_date = today + timedelta(days=1) #starting prediction week from tomorrow

    State_Code = state_dict[input_state]
    District_Code = district_dict[input_district]
    Market_Code = market_dict[input_market]

    input_data = []
    for i in range(7):
        input_data.append([input_date, State_Code, District_Code, Market_Code])
        input_date = input_date + timedelta(days=1) #date incremented by one day

    df_pred = pd.DataFrame(input_data, columns = ['ds', 'State_Code', 'District_Code', 'Market_Code'])
    input_data=[]

    #converting df columns to category dtype
    df_pred['State_Code'] = df_pred.State_Code.astype('category')
    df_pred['District_Code'] = df_pred.District_Code.astype('category')
    df_pred['Market_Code'] = df_pred.Market_Code.astype('category')

    return prophet_model.predict(df_pred)

In [None]:
week_prediction_result = get_week_prediction("Andhra Pradesh","Anantapur", "Gooti")

In [None]:
week_prediction_result

In [None]:
#LINEAR REGRESSION

In [None]:
dummies_state = pd.get_dummies(df.State)
dummies_day = pd.get_dummies(df.Day)
dummies_month = pd.get_dummies(df.Month)
dummies_year = pd.get_dummies(df.Year)
dummies_district = pd.get_dummies(df.District)
merged = pd.concat([df,dummies_day,dummies_month,dummies_year,dummies_state,dummies_district],axis='columns')
final = merged.drop(['Day','Month','Year','State','District','Market'],axis='columns')
X = final.drop('Price',axis='columns')
y = final.Price

In [None]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
regressor = LinearRegression()  
regressor.fit(X_train, y_train)

In [None]:
y_pred = regressor.predict(X_test)
df_ = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
df1 = df_.head(25)

In [None]:
accuracy = regressor.score(X_test,y_test)
print(accuracy)

In [None]:
#COMBINING DATE & PROPHET

In [None]:
df['Day']=df['Day'].apply(lambda x: '{0:0>2}'.format(x))

In [None]:
df['Month']=df['Month'].apply(lambda x: '{0:0>2}'.format(x))

In [None]:
df.head()

In [None]:
df['Year'] = df['Year'].apply(str)
df['Day']=df['Day'].apply(str)
df['Month']=df['Month'].apply(str)

In [None]:
df['date'] = df['Year'].str.cat(df['Month'], sep ="-")

In [None]:
df['date'] = df['date'].str.cat(df['Day'], sep ="-")

In [None]:
df.head()

In [None]:
df2.head()

In [None]:
df2 = df2[['date', 'Price']]
df2.head()
# len(df2)

In [None]:
df2 = df2.rename(columns={"date": "ds", "Price": "y"})

In [None]:
df2.head()

In [None]:
max(df2['y'])

In [None]:
print(df2[df2.y == df2.y.max()]) 

In [None]:
df2 = df2.drop(46582)

In [None]:
print(df2[df2.y == df2.y.max()]) 

In [None]:
from fbprophet import Prophet

In [None]:
m = Prophet(daily_seasonality = True) # the Prophet class (model)
m.fit(df2) # fit the model using all data

In [None]:
future = m.make_future_dataframe(periods=365) #we need to specify the number of days in future
prediction = m.predict(future)
m.plot(prediction)
plt.title("Prediction of the Google Stock Price using the Prophet")
plt.xlabel("Date")
plt.ylabel("Close Stock Price")
plt.ylim([0, 15000]) 
plt.show()

In [None]:
m.plot_components(prediction)
plt.show()