# Machine Learning Documentation

## Load Data

In [1]:
#import relevant packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from datetime import datetime
from datetime import date
from sklearn import linear_model
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LinearRegression

In [2]:
#loading data
df = pd.read_csv('MockDataV2.csv')
df.head()

Unnamed: 0,Index,TempIndex,ItemNumber,StationeryType,StationeryName,ReorderLevel,ReorderQty,UOM,Price,SupplierName,Dates,Time,RequisitionQuantity,TotalPrice
0,1,39,H032,Puncher,Hole Puncher 3 holes,50,$20.00,Each,12.6,CHEAP,"Monday, January 2, 2017",10:09:45,144,"$1,814.40"
1,2,19,E032,Exercise,Exercise Book A4 Hardcover 100 pg,100,$50.00,Each,0.21,WOW,"Monday, January 2, 2017",9:00:26,185,$38.85
2,3,90,T101,Tray,Trays White InOut,20,$10.00,Set,6.0,APLHA,"Monday, January 2, 2017",13:02:20,75,$450.00
3,4,78,S022,Stapler,Stapler No. 28,50,$20.00,Box,3.6,APLHA,"Monday, January 2, 2017",17:04:33,112,$403.20
4,5,55,P035,Pen,Pen Felt Tip Red,100,$50.00,Dozen,2.5,LOTCASH,"Monday, January 2, 2017",13:58:04,415,"$1,037.50"


In [3]:
#checking datatypes
df.dtypes

Index                    int64
TempIndex                int64
ItemNumber              object
StationeryType          object
StationeryName          object
ReorderLevel             int64
ReorderQty              object
UOM                     object
Price                  float64
SupplierName            object
Dates                   object
Time                    object
RequisitionQuantity      int64
TotalPrice              object
dtype: object

### Data Pre-processing

In [4]:
#adding a new column(TotalPrice_new) for converting TotalPrice into datatype float64.
df['TotalPrice_new'] = df['TotalPrice'].apply(lambda x: x.replace('$','')).apply(lambda x: x.replace(',','')).astype(np.float64)
df

Unnamed: 0,Index,TempIndex,ItemNumber,StationeryType,StationeryName,ReorderLevel,ReorderQty,UOM,Price,SupplierName,Dates,Time,RequisitionQuantity,TotalPrice,TotalPrice_new
0,1,39,H032,Puncher,Hole Puncher 3 holes,50,$20.00,Each,12.60,CHEAP,"Monday, January 2, 2017",10:09:45,144,"$1,814.40",1814.40
1,2,19,E032,Exercise,Exercise Book A4 Hardcover 100 pg,100,$50.00,Each,0.21,WOW,"Monday, January 2, 2017",9:00:26,185,$38.85,38.85
2,3,90,T101,Tray,Trays White InOut,20,$10.00,Set,6.00,APLHA,"Monday, January 2, 2017",13:02:20,75,$450.00,450.00
3,4,78,S022,Stapler,Stapler No. 28,50,$20.00,Box,3.60,APLHA,"Monday, January 2, 2017",17:04:33,112,$403.20,403.20
4,5,55,P035,Pen,Pen Felt Tip Red,100,$50.00,Dozen,2.50,LOTCASH,"Monday, January 2, 2017",13:58:04,415,"$1,037.50",1037.50
5,6,27,F023,File,File-Brown wo Logo,200,$150.00,Each,2.10,APLHA,"Monday, January 2, 2017",15:00:12,1440,"$3,024.00",3024.00
6,7,68,R001,Ruler,Ruler 6inch,50,$20.00,Dozen,2.50,APLHA,"Monday, January 2, 2017",14:52:04,48,$120.00,120.00
7,8,21,E034,Exercise,Exercise Book A4 Hardcover 200 pg,100,$50.00,Each,0.27,BANES,"Monday, January 2, 2017",16:02:09,300,$81.00,81.00
8,9,53,P033,Pen,Pen Felt Tip Black,100,$50.00,Dozen,2.50,RANDOM,"Monday, January 2, 2017",11:11:35,320,$800.00,800.00
9,10,18,E031,Exercise,Exercise Book 120 pg,100,$50.00,Each,0.26,BANES,"Monday, January 2, 2017",10:30:00,485,$126.10,126.10


In [5]:
#extract the necessary columns
df_extracted = df.iloc[:,[1,2,3,4,9,10,12,13,14]]
df_extracted.head()

Unnamed: 0,TempIndex,ItemNumber,StationeryType,StationeryName,SupplierName,Dates,RequisitionQuantity,TotalPrice,TotalPrice_new
0,39,H032,Puncher,Hole Puncher 3 holes,CHEAP,"Monday, January 2, 2017",144,"$1,814.40",1814.4
1,19,E032,Exercise,Exercise Book A4 Hardcover 100 pg,WOW,"Monday, January 2, 2017",185,$38.85,38.85
2,90,T101,Tray,Trays White InOut,APLHA,"Monday, January 2, 2017",75,$450.00,450.0
3,78,S022,Stapler,Stapler No. 28,APLHA,"Monday, January 2, 2017",112,$403.20,403.2
4,55,P035,Pen,Pen Felt Tip Red,LOTCASH,"Monday, January 2, 2017",415,"$1,037.50",1037.5


In [6]:
df_extracted['Dates'] = df_extracted['Dates'].astype('datetime64[ns]')
df_extracted

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,TempIndex,ItemNumber,StationeryType,StationeryName,SupplierName,Dates,RequisitionQuantity,TotalPrice,TotalPrice_new
0,39,H032,Puncher,Hole Puncher 3 holes,CHEAP,2017-01-02,144,"$1,814.40",1814.40
1,19,E032,Exercise,Exercise Book A4 Hardcover 100 pg,WOW,2017-01-02,185,$38.85,38.85
2,90,T101,Tray,Trays White InOut,APLHA,2017-01-02,75,$450.00,450.00
3,78,S022,Stapler,Stapler No. 28,APLHA,2017-01-02,112,$403.20,403.20
4,55,P035,Pen,Pen Felt Tip Red,LOTCASH,2017-01-02,415,"$1,037.50",1037.50
5,27,F023,File,File-Brown wo Logo,APLHA,2017-01-02,1440,"$3,024.00",3024.00
6,68,R001,Ruler,Ruler 6inch,APLHA,2017-01-02,48,$120.00,120.00
7,21,E034,Exercise,Exercise Book A4 Hardcover 200 pg,BANES,2017-01-02,300,$81.00,81.00
8,53,P033,Pen,Pen Felt Tip Black,RANDOM,2017-01-02,320,$800.00,800.00
9,18,E031,Exercise,Exercise Book 120 pg,BANES,2017-01-02,485,$126.10,126.10


In [7]:
df_extracted.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Data columns (total 9 columns):
TempIndex              42000 non-null int64
ItemNumber             42000 non-null object
StationeryType         42000 non-null object
StationeryName         42000 non-null object
SupplierName           42000 non-null object
Dates                  42000 non-null datetime64[ns]
RequisitionQuantity    42000 non-null int64
TotalPrice             42000 non-null object
TotalPrice_new         42000 non-null float64
dtypes: datetime64[ns](1), float64(1), int64(2), object(5)
memory usage: 2.9+ MB


In [8]:
#list down all the stationery types
TypeArray = df_extracted.StationeryType.unique()
TypeArray

#replace stationery type name with an integer 
def Trans_StationeryType(x):
    for y in range(0,len(TypeArray)):
        if x == TypeArray[y]:
            return y+1

#adding a new column for the stationery type in terms of integer
df_extracted['TransStationeryType'] = df_extracted['StationeryType'].apply(Trans_StationeryType)
df_extracted.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if sys.path[0] == '':


Unnamed: 0,TempIndex,ItemNumber,StationeryType,StationeryName,SupplierName,Dates,RequisitionQuantity,TotalPrice,TotalPrice_new,TransStationeryType
0,39,H032,Puncher,Hole Puncher 3 holes,CHEAP,2017-01-02,144,"$1,814.40",1814.4,1
1,19,E032,Exercise,Exercise Book A4 Hardcover 100 pg,WOW,2017-01-02,185,$38.85,38.85,2
2,90,T101,Tray,Trays White InOut,APLHA,2017-01-02,75,$450.00,450.0,3
3,78,S022,Stapler,Stapler No. 28,APLHA,2017-01-02,112,$403.20,403.2,4
4,55,P035,Pen,Pen Felt Tip Red,LOTCASH,2017-01-02,415,"$1,037.50",1037.5,5


In [9]:
#groupby Stationery Name & Type & Supplier and sum the quantity and total price
df_new = df_extracted.groupby(['StationeryName','StationeryType','SupplierName']).agg({'RequisitionQuantity':'sum','TotalPrice_new':'sum'}) 
df_new

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,RequisitionQuantity,TotalPrice_new
StationeryName,StationeryType,SupplierName,Unnamed: 3_level_1,Unnamed: 4_level_1
Clips Double 1inch,Clip,FIEND,76173,76173.00
Clips Double 2inch,Clip,BANES,80574,96688.80
Clips Double 34inch,Clip,RANDOM,77469,69722.10
Clips Paper Large,Clip,APLHA,77700,155400.00
Clips Paper Medium,Clip,APLHA,81249,121873.50
Clips Paper Small,Clip,BANES,77007,92408.40
Envelope Brown 3inchx6inch,Envelope,ISS,986360,986360.00
Envelope Brown 3inchx6inch w Window,Envelope,OMEGA,961360,1057496.00
Envelope Brown 5inchx7inch,Envelope,LOTCASH,1067880,1708608.00
Envelope Brown 5inchx7inch w Window,Envelope,ISS,993800,1689460.00


In [10]:
df_extracted.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42000 entries, 0 to 41999
Data columns (total 10 columns):
TempIndex              42000 non-null int64
ItemNumber             42000 non-null object
StationeryType         42000 non-null object
StationeryName         42000 non-null object
SupplierName           42000 non-null object
Dates                  42000 non-null datetime64[ns]
RequisitionQuantity    42000 non-null int64
TotalPrice             42000 non-null object
TotalPrice_new         42000 non-null float64
TransStationeryType    42000 non-null int64
dtypes: datetime64[ns](1), float64(1), int64(3), object(5)
memory usage: 3.2+ MB


In [11]:
df_filterByValue = df_extracted[df_extracted['StationeryName'].str.match('Transparency Green')] 
df_filterByValue['Cumulative_RQ'] = df_filterByValue['RequisitionQuantity'].cumsum(axis=0) 
df_filterByValue['Dates_ordinal'] = df_filterByValue['Dates'].apply(lambda date: date.toordinal())
df_filterByValue

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,TempIndex,ItemNumber,StationeryType,StationeryName,SupplierName,Dates,RequisitionQuantity,TotalPrice,TotalPrice_new,TransStationeryType,Cumulative_RQ,Dates_ordinal
102,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-03,860,"$1,204.00",1204.0,17,860,736332
130,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-04,720,"$1,008.00",1008.0,17,1580,736333
134,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-04,840,"$1,176.00",1176.0,17,2420,736333
166,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-04,740,"$1,036.00",1036.0,17,3160,736333
233,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-05,580,$812.00,812.0,17,3740,736334
342,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-09,1440,"$2,016.00",2016.0,17,5180,736338
489,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-11,900,"$1,260.00",1260.0,17,6080,736340
508,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-12,360,$504.00,504.0,17,6440,736341
519,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-12,980,"$1,372.00",1372.0,17,7420,736341
660,85,T022,Tparency,Transparency Green,LOTCASH,2017-01-16,1780,"$2,492.00",2492.0,17,9200,736345


In [12]:
itemId_array = df_extracted['TempIndex'].unique()
itemId_array

array([39, 19, 90, 78, 55, 27, 68, 21, 53, 18, 15, 80, 40, 89, 48,  6, 71,
       25,  5,  9, 79,  8, 70, 50, 16, 51, 65, 12, 22, 11, 72, 42, 41, 56,
       75, 57, 61, 49,  3, 36, 37, 66, 87, 44, 74, 43, 47, 73, 59, 54, 64,
        7, 86, 60, 52, 63, 67, 46, 35, 13,  2, 10, 26, 14, 58, 85, 17,  1,
       38, 45, 76, 20, 82,  4, 69, 33, 81, 84, 30, 88, 29, 31, 62, 77, 83,
       34, 32, 23, 24, 28], dtype=int64)

## LR Model Training

In [13]:
class LRTraining:
    def __init__(self, InputName, InputNameForGraph, InputNameForScore, InputNameForRQ, InputNameForRD,InputYear,InputMonth,InputDay):
        self.InputName = InputName
        self.InputNameForGraph = InputNameForGraph
        self.InputNameForScore = InputNameForScore
        self.InputNameForRQ = InputNameForRQ
        self.InputNameForRD = InputNameForRD
        self.InputYear = InputYear
        self.InputMonth = InputMonth
        self.InputDay = InputDay
    
    def SelectByName(InputName):
        df_filterByValue = df_extracted[df_extracted['StationeryName'].str.match(InputName)] #display item with selected name
        df_filterByValue['Cumulative_RQ'] = df_filterByValue['RequisitionQuantity'].cumsum(axis=0) #add a cumulative requisition quantity column
        df_filterByValue['Dates_ordinal'] = df_filterByValue['Dates'].apply(lambda date: date.toordinal()) #convert Dates from object to datetime
        return df_filterByValue
    
    def LinearRegressionGraph(InputNameForGraph):
        df_filterByValue = df_extracted[df_extracted['StationeryName'].str.match(InputNameForGraph)] 
        df_filterByValue['Cumulative_RQ'] = df_filterByValue['RequisitionQuantity'].cumsum(axis=0) 
        df_filterByValue['Dates_ordinal'] = df_filterByValue['Dates'].apply(lambda date: date.toordinal()) 
        ax = sns.regplot(x='Dates_ordinal', y = 'Cumulative_RQ', data = df_filterByValue)
        ax.set_xlim(df_filterByValue['Dates_ordinal'].min() - 1, df_filterByValue['Dates_ordinal'].max() + 1)
        ax.set_ylim(0, df_filterByValue['Cumulative_RQ'].max() + 1)
        ax.set_xlabel('Time')
        ax.set_ylabel('Requisition Quantity')
        new_labels = [date.fromordinal(int(item)) for item in ax.get_xticks()]
        ax.set_xticklabels(new_labels)
        plt.xticks(rotation=45)
        return ax
    
    def LinearRegressionScore(InputNameForScore):
        df_filterByValue = df_extracted[df_extracted['StationeryName'].str.match(InputNameForScore)] 
        df_filterByValue['Cumulative_RQ'] = df_filterByValue['RequisitionQuantity'].cumsum(axis=0) 
        df_filterByValue['Dates_ordinal'] = df_filterByValue['Dates'].apply(lambda date: date.toordinal()) 
        X = df_filterByValue[['Dates_ordinal']]
        y = df_filterByValue['Cumulative_RQ']
        X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)
        linReg = LinearRegression()
        linReg.fit(X_train, y_train)
        y_prep = linReg.predict(X_test)
        the_score = r2_score(y_test, y_prep)
        return the_score
    
    def EstimateRequisitionQuantity(InputYear,InputMonth,InputDay,InputNameForRQ):
        df_filterByValue = df_extracted[df_extracted['StationeryName'].str.match(InputNameForRQ)] 
        df_filterByValue['Cumulative_RQ'] = df_filterByValue['RequisitionQuantity'].cumsum(axis=0) 
        df_filterByValue['Dates_ordinal'] = df_filterByValue['Dates'].apply(lambda date: date.toordinal()) 
        input_date = date(InputYear,InputMonth,InputDay)
        input_date_ordinal = input_date.toordinal()
        X = df_filterByValue[['Dates_ordinal']]
        y = df_filterByValue['Cumulative_RQ']
        X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)
        linReg = LinearRegression()
        linReg.fit(X_train, y_train)
        requisition_quantity_prediction = linReg.predict([[input_date_ordinal]])
        quantity_difference = requisition_quantity_prediction - df_filterByValue.iloc[-1,8]
        requisition_msg_1 = 'May not receive ANY requisition by this date!'
        requisition_msg_2 = 'Estimated requisition quantity: '
        if(quantity_difference > 0):
            return requisition_msg_2 + str(int(quantity_difference)) + ' original quantity: ' + str(df_filterByValue.iloc[-1,8]) + ' Predicted quantity: ' + str(requisition_quantity_prediction)
        else:
            return requisition_msg_1
        
    def NextRequisitionDay(InputNameForRD):
        df_filterByValue = df_extracted[df_extracted['StationeryName'].str.match(InputNameForRD)] 
        df_filterByValue['Cumulative_RQ'] = df_filterByValue['RequisitionQuantity'].cumsum(axis=0) 
        df_filterByValue['Dates_ordinal'] = df_filterByValue['Dates'].apply(lambda date: date.toordinal()) 
        latest_requisition_quantity = df_filterByValue.iloc[-1,8]
        latest_requisition_date = df_filterByValue.iloc[-1,9]
        estimate_next_requisition = latest_requisition_date
        X = df_filterByValue[['Dates_ordinal']]
        y = df_filterByValue['Cumulative_RQ']
        X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)
        linReg = LinearRegression()
        linReg.fit(X_train, y_train)
        current_requisition_quantity = linReg.predict([[estimate_next_requisition]])
        if(current_requisition_quantity <= latest_requisition_quantity):
            while(current_requisition_quantity <= latest_requisition_quantity):
                estimate_next_requisition = estimate_next_requisition +1
                current_requisition_quantity = linReg.predict([[estimate_next_requisition]])
            return date.fromordinal(estimate_next_requisition)
        else: 
            return 'You should receive requisition by Now!'

    def EstimateAllRequisitionQuantity(InputYear,InputMonth,InputDay):
        itemId_array = df_extracted['TempIndex'].unique()
        itemCode_array = df_extracted['ItemNumber'].unique()
        itemSelectDateRQ_list = []
        itemOneMonthRQ_list = []
        itemDate_list = []
        itemMonthEndDate_list = []
        itemSelectedDate_list =[]
        for item in itemCode_array:
            df_filterByValue = df_extracted[df_extracted['ItemNumber'].str.match(item)] 
            df_filterByValue['Cumulative_RQ'] = df_filterByValue['RequisitionQuantity'].cumsum(axis=0) 
            df_filterByValue['Dates_ordinal'] = df_filterByValue['Dates'].apply(lambda date: date.toordinal()) 
            input_date = date(InputYear,InputMonth,InputDay)
            if(InputMonth == 4) or (InputMonth == 6) or (InputMonth == 9) or (InputMonth == 11):
                input_date_ordinal = input_date.toordinal()
                final_firstdayofmonth = date(InputYear,InputMonth,1)
                final_firstdayofmonth_ordinal = final_firstdayofmonth.toordinal()
                final_lastdayofmonth = date(InputYear,InputMonth,30)
                final_lastdayofmonth_ordinal = final_lastdayofmonth.toordinal()
                X = df_filterByValue[['Dates_ordinal']]
                y = df_filterByValue['Cumulative_RQ']
                X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)
                linReg = LinearRegression()
                linReg.fit(X_train, y_train)
                requisition_quantity_prediction = linReg.predict([[input_date_ordinal]])
                requisition_quantity_prediction_on_firstdayofmonth = linReg.predict([[final_firstdayofmonth_ordinal]])
                requisition_quantity_prediction_on_lastdayofmonth = linReg.predict([[final_lastdayofmonth_ordinal]])
#                 quantity_difference = requisition_quantity_prediction - df_filterByValue.iloc[-1,8]
#                 one_Month_RQprediction = linReg.predict([[df_filterByValue.iloc[-1,9]+29]])
#                 one_Month_RQ_difference = one_Month_RQprediction - df_filterByValue.iloc[-1,8]
                one_Month_RQ_difference = requisition_quantity_prediction_on_lastdayofmonth - requisition_quantity_prediction_on_firstdayofmonth
                requisition_msg_1 = '0'

#                 if(quantity_difference > 0):
#                     itemSelectDateRQ_list.append(str(int(quantity_difference)))
#                 else:
#                     itemSelectDateRQ_list.append(requisition_msg_1)
                if(one_Month_RQ_difference > 0):
                    itemOneMonthRQ_list.append(str(int(one_Month_RQ_difference)))
                else:
                    itemOneMonthRQ_list.append(requisition_msg_1)
                itemDate_list.append(str(final_firstdayofmonth))
                itemMonthEndDate_list.append(str(final_lastdayofmonth))
                itemSelectedDate_list.append(str(date.fromordinal(input_date_ordinal)))
            elif(InputMonth == 1) or (InputMonth == 3) or (InputMonth == 5) or (InputMonth == 7) or (InputMonth == 8) or (InputMonth == 10) or (InputMonth == 12):
                input_date_ordinal = input_date.toordinal()
                final_firstdayofmonth = date(InputYear,InputMonth,1)
                final_firstdayofmonth_ordinal = final_firstdayofmonth.toordinal()
                final_lastdayofmonth = date(InputYear,InputMonth,31)
                final_lastdayofmonth_ordinal = final_lastdayofmonth.toordinal()
                X = df_filterByValue[['Dates_ordinal']]
                y = df_filterByValue['Cumulative_RQ']
                X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)
                linReg = LinearRegression()
                linReg.fit(X_train, y_train)
                requisition_quantity_prediction = linReg.predict([[input_date_ordinal]])
                requisition_quantity_prediction_on_firstdayofmonth = linReg.predict([[final_firstdayofmonth_ordinal]])
                requisition_quantity_prediction_on_lastdayofmonth = linReg.predict([[final_lastdayofmonth_ordinal]])
#                 quantity_difference = requisition_quantity_prediction - df_filterByValue.iloc[-1,8]
#                 one_Month_RQprediction = linReg.predict([[df_filterByValue.iloc[-1,9]+30]])
#                 one_Month_RQ_difference = one_Month_RQprediction - df_filterByValue.iloc[-1,8]
                one_Month_RQ_difference = requisition_quantity_prediction_on_lastdayofmonth - requisition_quantity_prediction_on_firstdayofmonth
                requisition_msg_1 = '0'

#                 if(quantity_difference > 0):
#                     itemSelectDateRQ_list.append(str(int(quantity_difference)))
#                 else:
#                     itemSelectDateRQ_list.append(requisition_msg_1)
                if(one_Month_RQ_difference > 0):
                    itemOneMonthRQ_list.append(str(int(one_Month_RQ_difference)))
                else:
                    itemOneMonthRQ_list.append(requisition_msg_1)
                itemDate_list.append(str(final_firstdayofmonth))
                itemMonthEndDate_list.append(str(final_lastdayofmonth))
                itemSelectedDate_list.append(str(date.fromordinal(input_date_ordinal)))
            else:
                if(InputYear % 4 == 0):
                    input_date_ordinal = input_date.toordinal()
                    final_firstdayofmonth = date(InputYear,InputMonth,1)
                    final_firstdayofmonth_ordinal = final_firstdayofmonth.toordinal()
                    final_lastdayofmonth = date(InputYear,InputMonth,29)
                    final_lastdayofmonth_ordinal = final_lastdayofmonth.toordinal()
                    X = df_filterByValue[['Dates_ordinal']]
                    y = df_filterByValue['Cumulative_RQ']
                    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)
                    linReg = LinearRegression()
                    linReg.fit(X_train, y_train)
                    requisition_quantity_prediction = linReg.predict([[input_date_ordinal]])
                    requisition_quantity_prediction_on_firstdayofmonth = linReg.predict([[final_firstdayofmonth_ordinal]])
                    requisition_quantity_prediction_on_lastdayofmonth = linReg.predict([[final_lastdayofmonth_ordinal]])
#                     quantity_difference = requisition_quantity_prediction - df_filterByValue.iloc[-1,8]
#                     one_Month_RQprediction = linReg.predict([[df_filterByValue.iloc[-1,9]+28]])
#                     one_Month_RQ_difference = one_Month_RQprediction - df_filterByValue.iloc[-1,8]
                    one_Month_RQ_difference = requisition_quantity_prediction_on_lastdayofmonth - requisition_quantity_prediction_on_firstdayofmonth
                    requisition_msg_1 = '0'

#                     if(quantity_difference > 0):
#                         itemSelectDateRQ_list.append(str(int(quantity_difference)))
#                     else:
#                         itemSelectDateRQ_list.append(requisition_msg_1)
                    if(one_Month_RQ_difference > 0):
                        itemOneMonthRQ_list.append(str(int(one_Month_RQ_difference)))
                    else:
                        itemOneMonthRQ_list.append(requisition_msg_1)
                    itemDate_list.append(str(final_firstdayofmonth))
                    itemMonthEndDate_list.append(str(final_lastdayofmonth))
                    itemSelectedDate_list.append(str(date.fromordinal(input_date_ordinal)))
                else:
                    input_date_ordinal = input_date.toordinal()
                    final_firstdayofmonth = date(InputYear,InputMonth,1)
                    final_firstdayofmonth_ordinal = final_firstdayofmonth.toordinal()
                    final_lastdayofmonth = date(InputYear,InputMonth,28)
                    final_lastdayofmonth_ordinal = final_lastdayofmonth.toordinal()
                    X = df_filterByValue[['Dates_ordinal']]
                    y = df_filterByValue['Cumulative_RQ']
                    X_train, X_test, y_train, y_test = train_test_split(X,y, random_state = 0)
                    linReg = LinearRegression()
                    linReg.fit(X_train, y_train)
                    requisition_quantity_prediction = linReg.predict([[input_date_ordinal]])
                    requisition_quantity_prediction_on_firstdayofmonth = linReg.predict([[final_firstdayofmonth_ordinal]])
                    requisition_quantity_prediction_on_lastdayofmonth = linReg.predict([[final_lastdayofmonth_ordinal]])
#                     quantity_difference = requisition_quantity_prediction - df_filterByValue.iloc[-1,8]
#                     one_Month_RQprediction = linReg.predict([[df_filterByValue.iloc[-1,9]+27]])
#                     one_Month_RQ_difference = one_Month_RQprediction - df_filterByValue.iloc[-1,8]
                    one_Month_RQ_difference = requisition_quantity_prediction_on_lastdayofmonth - requisition_quantity_prediction_on_firstdayofmonth
                    requisition_msg_1 = '0'

#                     if(quantity_difference > 0):
#                         itemSelectDateRQ_list.append(str(int(quantity_difference)))
#                     else:
#                         itemSelectDateRQ_list.append(requisition_msg_1)
                    if(one_Month_RQ_difference > 0):
                        itemOneMonthRQ_list.append(str(int(one_Month_RQ_difference)))
                    else:
                        itemOneMonthRQ_list.append(requisition_msg_1)
                    itemDate_list.append(str(final_firstdayofmonth))
                    itemMonthEndDate_list.append(str(final_lastdayofmonth))
                    itemSelectedDate_list.append(str(date.fromordinal(input_date_ordinal)))
            
        itemRQ_array = np.asarray(itemSelectDateRQ_list)
        itemOneMonthRQ_array = np.asarray(itemOneMonthRQ_list)
        itemDate_array = np.asarray(itemDate_list)
        itemMonthEndDate_array = np.asarray(itemMonthEndDate_list)
        itemSelectedDate_array = np.asarray(itemSelectedDate_list)
        df_AllRQ = pd.DataFrame({'Id':itemId_array,'Reorder level and quantity': itemOneMonthRQ_array})
 #       df_AllRQ['Reorder level and quantity'] = itemOneMonthRQ_array
#        df_AllRQ['Last day of the Month'] = itemMonthEndDate_array
#        df_AllRQ['1st Day of the Moneth'] = itemDate_array
#        df_AllRQ['Estimated One Day Requisition Quantity'] = itemOneMonthRQ_array
#         df_AllRQ['Selected Requisition day'] = itemSelectedDate_array
#         df_AllRQ['Estimated Requisition Quantity'] = itemRQ_array
        return df_AllRQ

In [14]:
LRTraining.EstimateAllRequisitionQuantity(2019,9,16)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is tryin

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


Unnamed: 0,Id,Reorder level and quantity
0,39,1592
1,19,4139
2,90,709
3,78,1534
4,55,4177
5,27,11906
6,68,1628
7,21,3918
8,53,3964
9,18,3894


In [15]:
LRTraining.EstimateRequisitionQuantity(2019,8,15,'Envelope Brown 5inchx7inch')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


'Estimated requisition quantity: 2093668 original quantity: 952.0 Predicted quantity: [2094620.62037635]'

In [16]:
LRT = LRTraining
pickle.dump(LRT, open('LogicDataOne.pkl', 'wb'))

In [17]:
'Most Recent Requisition Date': itemDate_array, 'One Day Prediction': itemOneDayRQ_array, 'Selected Requisition day': itemSelectedDate_array,

SyntaxError: invalid syntax (<ipython-input-17-463bc0fe503f>, line 1)

In [None]:
'Estimated Requisition Quantity': itemRQ_array