In [184]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.multioutput import MultiOutputRegressor
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error

## Import data

In [137]:
data = pd.read_csv('MarketPricePrediction.csv')

In [138]:
data.shape

(10227, 10)

## Preprocessing

In [139]:
data.isnull().sum()

market      0
month       0
year        0
quantity    0
priceMin    0
priceMax    0
priceMod    0
state       0
city        0
date        0
dtype: int64

In [140]:
data = data.drop('date', axis = 1)
data.head()

Unnamed: 0,market,month,year,quantity,priceMin,priceMax,priceMod,state,city
0,ABOHAR(PB),January,2005,2350,404,493,446,PB,ABOHAR
1,ABOHAR(PB),January,2006,900,487,638,563,PB,ABOHAR
2,ABOHAR(PB),January,2010,790,1283,1592,1460,PB,ABOHAR
3,ABOHAR(PB),January,2011,245,3067,3750,3433,PB,ABOHAR
4,ABOHAR(PB),January,2012,1035,523,686,605,PB,ABOHAR


In [141]:
data['city'].unique().shape

(117,)

In [142]:
encoder = LabelEncoder()

In [143]:
data['city'] = encoder.fit_transform(data['city'])
data['month'] = encoder.fit_transform(data['month'])
data['market'] = encoder.fit_transform(data['market'])
data['state'] = encoder.fit_transform(data['state'])

In [144]:
data.head()

Unnamed: 0,market,month,year,quantity,priceMin,priceMax,priceMod,state,city
0,0,4,2005,2350,404,493,446,16,0
1,0,4,2006,900,487,638,563,16,0
2,0,4,2010,790,1283,1592,1460,16,0
3,0,4,2011,245,3067,3750,3433,16,0
4,0,4,2012,1035,523,686,605,16,0


### sort values

In [145]:
df = data.sort_values(by = ["year", "month"])

In [146]:
# Créez les nouvelles colonnes
df['prev_quantity'] = df.groupby(['market', 'year'])['quantity'].shift(1)
df['prev_priceMin'] = df.groupby(['market', 'year'])['priceMin'].shift(1)
df['prev_priceMax'] = df.groupby(['market', 'year'])['priceMax'].shift(1)
df['prev_priceMod'] = df.groupby(['market', 'year'])['priceMod'].shift(1)

In [147]:
df['prev_quantity'] = df.groupby(['month', 'market'])['prev_quantity'].fillna(method='ffill')
df['prev_priceMin'] = df.groupby(['month', 'market'])['prev_priceMin'].fillna(method='ffill')
df['prev_priceMax'] = df.groupby(['month', 'market'])['prev_priceMax'].fillna(method='ffill')
df['prev_priceMod'] = df.groupby(['month', 'market'])['prev_priceMod'].fillna(method='ffill')

In [148]:
df = df.dropna()

In [149]:
order = ['market', 'month', 'year', 'prev_quantity', 'prev_priceMin', 'prev_priceMax',
       'prev_priceMod', 'state', 'city', 'quantity', 'priceMin',
       'priceMax', 'priceMod']

In [150]:
df = df[order]

In [151]:
df.head()

Unnamed: 0,market,month,year,prev_quantity,prev_priceMin,prev_priceMax,prev_priceMod,state,city,quantity,priceMin,priceMax,priceMod
5541,68,1,1996,192592.0,136.0,279.0,254.0,14,65,173892,164,388,340
5621,68,2,1996,173892.0,164.0,388.0,340.0,14,65,240615,138,451,385
5420,68,3,1996,240615.0,138.0,451.0,385.0,14,65,196164,133,229,186
5399,68,4,1996,196164.0,133.0,229.0,186.0,14,65,225063,160,257,226
5521,68,5,1996,225063.0,160.0,257.0,226.0,14,65,156282,229,406,368


## Train and test data

In [None]:
X = df.iloc[:,:9]
y = df.iloc[:,9:]

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [153]:
x_train, x_test, y_train, y_test = train_test_split(X,y, test_size = 0.2)

# Model Extra trees

In [157]:
model = MultiOutputRegressor(ExtraTreesRegressor())

In [158]:
model.fit(x_train, y_train)

In [159]:
model.score(x_train, y_train)

1.0

In [160]:
predictions = model.predict(x_test)

In [161]:
predictions

array([[333490.95,    401.6 ,    853.04,    655.1 ],
       [ 46696.8 ,    600.48,   1239.18,    972.52],
       [273831.2 ,    758.7 ,   2013.07,   1469.96],
       ...,
       [ 50568.17,    664.69,   1599.95,   1220.02],
       [ 27008.84,    731.73,   1509.61,   1210.25],
       [ 16478.78,    492.07,    999.84,    778.73]])

## Model evaluation

In [185]:
np.sqrt(mean_squared_error(predictions, y_test))