In [None]:
# -------------------- 18011016 - Berk Kıras; 18011020 - Alper Reha Yazgan
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
import os

import sklearn.metrics as sm
from sklearn.pipeline import make_pipeline
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import VotingRegressor
from sklearn.svm import SVR


def read_csv_file_to_pdframe(filepath : str) -> pd.DataFrame:
    """
    Read csv file to pandas dataframe.
    """
    df =  pd.read_csv(filepath, sep=',', header=0, low_memory=False)
    return df

DATA_KASAHAREKET = "KASAHAREKET.csv"
DATA_URUNHAREKET = "URUNHAREKET.csv"
DATA_URUNLER = "URUNLER.csv"

In [None]:
# -------------------- Read CSV Files
working_dir = os.getcwd()
data_dir = os.path.join(working_dir, '.')

sales_df = read_csv_file_to_pdframe(os.path.join(data_dir, DATA_URUNHAREKET))
products_df = read_csv_file_to_pdframe(os.path.join(data_dir, DATA_URUNLER))

In [None]:
# -------------------- Data Preprocessing.... 

# -------------------- Sales Table
sales_df = sales_df.drop(columns=['MUSTERIKODU','BIRIMKODU','DEPO','DEPO2','AKDVDH','SKDVDH','SUBENO','ISKONTO1','ISKONTO2','ISKONTO3','ISKONTO4','ISKONTO5','PUANTAJ','URUNOZELLIKLERI','KDVORAN','BARKOD','DOVIZKUR','DOVIZFIYAT'])
sales_df_dates = pd.to_datetime(sales_df['TARIH'], format='%d.%m.%Y %H:%M:%S') # %d.%m.%Y %H:%M:%S

sales_df['TARIH'] = sales_df_dates
sales_df['BFIYAT'] = sales_df['BFIYAT'].str.replace(',', '.').astype(float)

sales_df = sales_df[sales_df['HTUR'] == 'C']
sales_df = sales_df.sort_values(by=['TARIH','URUNREF'], ascending=False)


# -------------------- Products Table
products_df = products_df.drop(columns=['KDURUM','GRUPKODU','BIRIM','SKDVDH','SKDVDH','ALTKATEGORI','ImageName1','ImageName2','ImageName3','ImageName4','ImageName5','DOVIZSFIYAT','DOVIZLIFIYATKULLAN','ALICIKODU','YSURUNKODU','YAZICIGRUBU','FOTOGRAF','HSG','MODEL','URETICIKODU','SATICIKODU','URUNTIPI','RENKBEDENOZELLIK','ETICARETURUN','ANAKATEGORI','HSGSIRA','SFIYAT5KDVDH','OZELLIKDURUM','OTOKARHESAPLA','GENELOZELKAR','KARORANI','BIRIMKODU','HSGKOD','OZELKOD','AKDVDH','UACIKLAMA','SFIYAT2','SFIYAT2KDVDH','UINDIRIMORAN','AKTIFFIYAT','PUANTAJ','DARA','LOKASYON','URUNGRAMAJ','URUNBFIYAT','MENSEI','ESKISFIYAT','ESKISFIYAT2','ESKISFIYAT3','SFIYAT3','SFIYAT3KDVDH','ESKISFIYAT4','SFIYAT4','SFIYAT4KDVDH','ESKISFIYAT5','SFIYAT5','MARKA','KATEGORI','DOVIZAFIYAT','DOVIZKUR'])

products_df_pilav = products_df[products_df['LOGICALREF'] == 67068]

products_df

Unnamed: 0,LOGICALREF,URUNKODU,URUNACIKLAMASI,KDV,SFIYAT,AFIYAT
0,67470,ST00000773,Nurlular A?urelik Bu?day 1KG,8,700,560
1,67472,ST00000775,?nc? limon kolonyas? 80cc,0,2750,2500
2,67474,ST00000777,?ar??ba?? Beyaz Peynir 800gr,0,2200,1770
3,67476,ST00000779,Elidor sa? k?remi 500ml,0,1500,1200
4,67477,ST00000780,S?ta? tereyag 100gr,0,595,500
...,...,...,...,...,...,...
2936,67465,ST00000768,Tekfen Enerji tasarruflu ampul,0,1400,1000
2937,67464,ST00000767,Bella House b?ua??k teli,18,1000,775
2938,67466,ST00000769,Han?meller Asorti 170 gr,0,400,260
2939,67468,ST00000771,ROUXS El Temizleme Jeli 250ml,0,1650,1200


In [None]:
# -------------------- Data Preprocessing.... 

# -------------------- Group By FISNO
sales_df_bills = sales_df.groupby('FISNO')['URUNREF'].apply(list).reset_index(name="products")

# -------------------- URUNREF
sales_df_pilav = sales_df[sales_df['URUNREF'] == 67068]

sales_df_pilav 

Unnamed: 0,LOGICALREF,FISNO,URUNREF,URUNKODU,TARIH,GMIKTAR,CMIKTAR,HTUR,BFIYAT,ISKONTO,KDV,TOPLAM
192127,231440,FS00102468,67068,ST00000371,2021-11-25 10:07:20,0,100,C,20.0,0,148,2000
179885,219300,FS00095363,67068,ST00000371,2021-10-07 11:15:51,0,200,C,18.0,0,267,3600
177000,216379,FS00093686,67068,ST00000371,2021-09-27 11:30:59,0,200,C,18.0,0,267,3600
166979,206230,FS00087924,67068,ST00000371,2021-08-25 12:11:54,0,100,C,18.0,0,133,1800
102041,200601,FS00084659,67068,ST00000371,2021-08-09 10:16:31,0,100,C,18.0,0,133,1800
103924,199276,FS00083884,67068,ST00000371,2021-08-05 11:09:55,0,200,C,18.0,0,267,3600
104967,198426,FS00083398,67068,ST00000371,2021-08-02 18:29:24,0,100,C,18.0,0,133,1800
106345,197519,FS00082858,67068,ST00000371,2021-07-31 09:07:32,0,100,C,18.0,0,133,1800
112535,192726,FS00080150,67068,ST00000371,2021-07-18 18:36:16,0,100,C,18.0,0,133,1800
116389,189736,FS00078774,67068,ST00000371,2021-07-13 11:16:40,0,200,C,18.0,0,267,3600


##Recommendation

In [None]:
sales_df_bills

Unnamed: 0,FISNO,products
0,DV00000001,[42384]
1,DV00000002,[42438]
2,DV00000003,[42663]
3,DV00000004,[42913]
4,DV00000005,[43837]
...,...,...
94613,FS00109221,[51295]
94614,FS00109222,"[68558, 68431, 68430, 67638, 66776, 66709]"
94615,FS00109223,[48477]
94616,FS00109224,[48990]


In [None]:
item_ids = []

In [None]:
Item_ID =  66776#@param {type:"raw"}
item_ids.append(Item_ID)
print(item_ids)

[66776]


In [None]:
def unique(list1):
  
    # initialize a null list
    unique_list = []
      
    # traverse for all elements
    for x in list1:
        # check if exists in unique_list or not
        if x not in unique_list:
            unique_list.append(x)
    return unique_list

In [None]:
recommended_ids = []

In [None]:
for item_id in item_ids: ##Found items -> Related items
  for list in sales_df_bills['products']:
    if len(list) > 1:
      if item_id in list:
        for item in list:
          recommended_ids.append(item)
          break
unique_list = unique(recommended_ids)
print(unique_list)



[66827, 66776, 66957, 67158, 67058, 67235, 67004, 67022, 67152, 66826, 67248, 67200, 67249, 67354, 67356, 67180, 67177, 67169, 67201, 67347, 67382, 67209, 67345, 67383, 67397, 66956, 67322, 67412, 67416, 67203, 67419, 67285, 67430, 67049, 67443, 67174, 67282, 67340, 67156, 66829, 67431, 67421, 67212, 67166, 67457, 67469, 67470, 67052, 67428, 66791, 67478, 66987, 67449, 67204, 67385, 67507, 67051, 67517, 67519, 67255, 67241, 67503, 67473, 67515, 66965, 67655, 67612, 67607, 67560, 67676, 67374, 67579, 67615, 67691, 67665, 67711, 67715, 67483, 67727, 67722, 67198, 67705, 67739, 67702, 67642, 67183, 67552, 67669, 67349, 67752, 67768, 67317, 67674, 67648, 67707, 67637, 67286, 67719, 67488, 67745, 67054, 67521, 67799, 67154, 67697, 66998, 67682, 67726, 67187, 67798, 67638, 67045, 67701, 67343, 67856, 67155, 67850, 67500, 67192, 67680, 67889, 67956, 67968, 67514, 67961, 67921, 67964, 67940, 67917, 67980, 68613, 68593, 68623, 67467, 68619, 67208, 68560, 68647, 68588, 68618, 68636, 67771, 68279

In [None]:
import random ##Random 10 İtem selected
list_recommended = []
for a in range(10):
  num = random.randint(1,len(unique_list))
  list_recommended.append(unique_list[num])

print(list_recommended)

[67249, 66987, 68647, 68747, 67156, 68860, 67917, 66829, 68772, 68747]


In [None]:
for item in list_recommended:
  products_df_prnt = products_df[products_df['LOGICALREF'] == item]
  print(products_df_prnt['URUNACIKLAMASI'])

2752    Eti cikolatal? gofret 34 gr
Name: URUNACIKLAMASI, dtype: object
2483    Oylum Gofret 400g Vanilyal?
Name: URUNACIKLAMASI, dtype: object
1154    ?lker yupo ?ampiyon 80 ge
Name: URUNACIKLAMASI, dtype: object
505    ?ark vital bal 700gr
Name: URUNACIKLAMASI, dtype: object
2624    ?ahin Fermente Sucuk 500g
Name: URUNACIKLAMASI, dtype: object
563    Eker bardak ayran 170ml
Name: URUNACIKLAMASI, dtype: object
1002    ?lker ?ikolata antep f?st?k 
Name: URUNACIKLAMASI, dtype: object
2336    Albeni At??t?rmal?k Extra 170g
Name: URUNACIKLAMASI, dtype: object
838    Baby turco cep mendili 3 l?
Name: URUNACIKLAMASI, dtype: object
505    ?ark vital bal 700gr
Name: URUNACIKLAMASI, dtype: object


In [None]:
# -------------------- Data Preprocessing

sales_df_pilav['CMIKTAR'] = sales_df_pilav['CMIKTAR'].str.replace(',', '.').astype(float)

X = np.array(sales_df_pilav[['LOGICALREF','CMIKTAR']].values.tolist())
y = np.array(sales_df_pilav[['BFIYAT']].values.tolist())

X

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  This is separate from the ipykernel package so we can avoid doing imports until


array([[2.31440e+05, 1.00000e+00],
       [2.19300e+05, 2.00000e+00],
       [2.16379e+05, 2.00000e+00],
       [2.06230e+05, 1.00000e+00],
       [2.00601e+05, 1.00000e+00],
       [1.99276e+05, 2.00000e+00],
       [1.98426e+05, 1.00000e+00],
       [1.97519e+05, 1.00000e+00],
       [1.92726e+05, 1.00000e+00],
       [1.89736e+05, 2.00000e+00],
       [1.86891e+05, 1.00000e+00],
       [1.84656e+05, 1.00000e+00],
       [1.77178e+05, 1.00000e+00],
       [1.72050e+05, 1.00000e+00],
       [1.69551e+05, 2.00000e+00],
       [1.69079e+05, 1.00000e+00],
       [1.63792e+05, 2.00000e+00],
       [1.60405e+05, 1.00000e+00],
       [1.60231e+05, 1.00000e+00],
       [1.59534e+05, 1.00000e+00],
       [1.59303e+05, 1.00000e+00],
       [1.58799e+05, 1.00000e+00],
       [1.58737e+05, 1.00000e+00],
       [1.57766e+05, 2.00000e+00],
       [1.45559e+05, 2.00000e+00],
       [1.32133e+05, 1.00000e+00],
       [1.31517e+05, 2.00000e+00],
       [1.30482e+05, 1.00000e+00],
       [1.26995e+05,

In [None]:
# -------------------- LINEAR REGRESSION 

lr = LinearRegression()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)

lr = LinearRegression().fit(X_train, y_train)

y_pred=lr.predict(X_test)
r2_score_lr = sm.r2_score(y_test, y_pred)
exp_var_score = sm.explained_variance_score(y_test, y_pred)

In [None]:
# -------------------- LINEAR REGRESSION RESULTS 

print("realistic score: ",r2_score_lr)
print("error deviations: ",exp_var_score)
print("predictions: ",y_pred)

realistic score:  0.5553856884059805
error deviations:  0.602534495128771
predictions:  [[16.44122163]
 [18.35567794]
 [14.3651547 ]
 [16.81520402]
 [17.09621364]
 [16.82788289]
 [16.70009148]
 [16.84557198]]


In [None]:
# -------------------- GRADIENT BOOSTING REGRESSION RESULTS
regr = GradientBoostingRegressor(n_estimators=1000, random_state=0)
regr.fit(X_train, y_train)

y_pred=regr.predict(X_test)
score = regr.score(X_test, y_test)

  y = column_or_1d(y, warn=True)


In [None]:
# -------------------- GRADIENT BOOSTING REGRESSION RESULTS 
print(score)
print(y_pred)

0.7480314931758996
[16.00000001 17.99999999 15.00000004 16.00000001 16.00000001 16.00000001
 16.00000001 16.00000001]


In [None]:
# -------------------- RANDOM FOREST REGRESSION 
regr = RandomForestRegressor(n_estimators = 500, max_depth=600, random_state=42)
regr.fit(X_train, y_train)
y_pred=regr.predict(X_test)
score = regr.score(X_test, y_test)

  This is separate from the ipykernel package so we can avoid doing imports until


In [None]:
# -------------------- RANDOM FOREST REGRESSION RESULTS 
print(score)
print(y_pred)

0.7451648503937007
[16.    18.    15.126 16.004 16.172 16.004 16.    16.004]


In [None]:
# -------------------- SVR - SUPPORT VECTOR REGRESSION
# https://scikit-learn.org/stable/modules/generated/sklearn.svm.SVR.html
svr = SVR(kernel = 'rbf')
svr.fit(X_train, y_train)

y_pred=svr.predict(X_test)
r2_score_svr = sm.r2_score(y_test, y_pred)
exp_var_score = sm.explained_variance_score(y_test, y_pred)


  y = column_or_1d(y, warn=True)


In [None]:
# https://kadirguzel.medium.com/kolektif-%C3%B6%C4%9Frenme-ve-bagging-algoritmas%C4%B1-e8ea3d932b72#:~:text=Kolektif%20%C3%96%C4%9Frenme%20de%20ise%20tek,T%C3%BCm%20makine%20%C3%B6%C4%9Frenmesi%20problemlerinde%20uygulanabilir.
# -------------------- SVR - SUPPORT VECTOR REGRESSION RESULTS 
print(r2_score_svr)
print(exp_var_score)
print(y_pred)



0.5120774304383588
0.6053725485183182
[16.45855654 18.23891415 15.09480559 16.82744569 17.19036087 16.96611394
 16.79860644 16.8673589 ]


In [None]:
# -------------------- VOTING REGRESSION 

reg1 = GradientBoostingRegressor(n_estimators=1000, random_state=0)
reg2 = RandomForestRegressor(n_estimators = 1000, max_depth=100, random_state=0)
ereg = VotingRegressor(estimators=[('gb', reg1), ('rf', reg2)])
ereg.fit(X_train, y_train)
y_pred=ereg.predict(X_test)
score = ereg.score(X_test, y_test)

  y = column_or_1d(y, warn=True)


In [None]:
# -------------------- VOTING REGRESSION RESULT 
print(score)
print(y_pred)

0.747412581057336
[16.         17.99999999 15.05850002 16.001      16.08       16.001
 16.         16.001     ]
