In [1]:
# Import required python libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [2]:
# Load data from excel file
data = pd.read_excel('laptop_details.xlsx')
data.shape

(300, 17)

In [3]:
# Dropping insignificant columns
data.drop('Company',axis=1, inplace=True)
data.drop('ThirdPL',axis=1, inplace=True)

In [4]:
# adding the target column to the end
data['sales_location'] = data['Sales_location']
data.drop('Sales_location',axis=1, inplace=True)
data.drop('Abbreviation', axis=1, inplace=True)

In [5]:
# calculates today's date
import datetime
date = (datetime.date.today() - datetime.timedelta(3*365/12)).isoformat()
date

'2018-08-13'

In [6]:
# Training data contains sold products whereas testing data contains unsold products
train_data = data[data.Status == 'sold']
test_data = data[data.Status == 'not sold']

In [7]:
# storing the stock_in_date and product columns
stock_in_date = test_data.Stock_in_date
product = test_data.Product
product.shape
stock_in_date.shape

(112,)

In [8]:
# Label encoding of categorical data to numerical data
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
for i in ([0,1,2,3,4,5,6,7,8]):
    array = le.fit_transform(np.array(train_data.iloc[:,i]))
    train_data.iloc[:,i] = array
for i in ([0,1,2,3,4,5,6,7,8]):
    array = le.fit_transform(np.array(test_data.iloc[:,i]))
    test_data.iloc[:,i] = array

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [9]:
# Mapping 'sold' category as 1 and 'not sold' category as 0
train_data.iloc[:,12] = train_data.iloc[:,12].map({'sold': 1, 'not sold': 0})
test_data.iloc[:,12] = test_data.iloc[:,12].map({'sold': 1, 'not sold': 0})

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [10]:
train_data.head()

Unnamed: 0,Product,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price_euros,Stock_in_date,Status,sales_location
0,0,2,4,5,15,1,2,27,2,3.21kg,2051.0,2018-11-01,1,New Delhi
1,0,2,4,5,30,1,9,28,2,3.49kg,2813.75,2018-09-04,1,West Bengal
3,0,2,4,3,22,1,9,31,2,3.21kg,1329.0,2018-08-24,1,New Delhi
4,1,2,5,9,29,1,9,27,2,4.42kg,2456.34,2018-09-01,1,Odisha
5,1,2,5,6,30,1,9,28,2,4.42kg,2799.0,2018-10-29,1,Tamil Nadu


In [11]:
# creating training and testing data to be trained and predicted respectively
X_train = train_data.iloc[:,:-1]
Y_train = train_data.iloc[:,-1]
X_test = test_data.iloc[:,:-1]
Y_test = test_data.iloc[:,-1]

In [12]:
array = le.fit_transform(np.array(Y_train))
Y_train = array
array = le.fit_transform(np.array(Y_test))
Y_test = array

Y_train

array([ 6, 12,  6,  7, 10, 10,  1,  2,  7,  6,  6,  0,  3,  6,  2, 10,  7,
        7,  1,  6,  5,  2,  8,  7,  0,  2,  1,  7,  2, 13,  6,  2,  1,  6,
        6,  7,  3,  6,  0,  0, 10,  8,  6,  6,  4,  8,  7,  3,  9,  4,  4,
        1,  0,  2, 10, 12, 10,  8, 10,  8,  8,  1,  8,  4,  6,  6, 10, 12,
        6, 12,  4,  7, 11,  6, 10,  6,  0, 11, 12,  6, 12, 11,  4,  6, 12,
        2,  1, 10,  1, 10, 12,  4,  6,  6,  7, 10, 11,  2, 11, 12,  1,  3,
        1,  6, 10,  8,  7, 10, 12,  8,  4,  3,  0,  6,  6,  6,  4, 11,  8,
        4,  0,  0,  7,  8,  3,  3,  1,  8,  0,  4,  4,  6,  7, 12,  2, 12,
        1, 12,  4,  3,  6,  0,  1,  7, 10,  7,  3,  2,  6,  6,  0,  1,  8,
       11,  1,  2,  6,  0,  4,  1,  3,  6,  6, 12, 12,  0,  7,  6,  4, 10,
        4, 12,  7,  6,  6,  2,  6,  1, 10,  0,  7,  7, 10, 10,  4,  6,  0,
        1], dtype=int64)

In [13]:
# Dropping the weight column
X_train.drop('Weight',axis=1, inplace=True)
X_test.drop('Weight', axis=1, inplace=True)

In [14]:
# Converting string values in Price_euros column to float values
X_train.Price_euros = X_train.Price_euros.astype(float)
X_test.Price_euros = X_test.Price_euros.astype(float)

In [15]:
X_train.shape

(188, 12)

In [16]:
X_test.shape

(112, 12)

In [17]:
# store stock_in_date
stock_in_date = X_test['Stock_in_date']
X_train.drop('Stock_in_date',axis=1,inplace=True)
X_test.drop('Stock_in_date',axis=1,inplace=True)

In [18]:
# Normalization the data to prevent overfitting
from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.transform(X_test)
X_train
X_test

array([[-1.3522906 , -1.64991237,  1.31346639, ..., -2.21089828,
         2.15569009, -1.        ],
       [-1.30001204, -1.64991237,  2.26320363, ..., -2.21089828,
         2.48948102, -1.        ],
       [-1.30001204, -1.64991237,  2.26320363, ..., -2.21089828,
         2.13238051, -1.        ],
       ...,
       [ 0.73885176, -0.99689442,  1.31346639, ..., -2.21089828,
         0.92835594, -1.        ],
       [ 0.73885176, -0.99689442,  1.31346639, ..., -2.21089828,
         0.71810718, -1.        ],
       [ 0.79113032, -0.34387647,  1.31346639, ..., -2.21089828,
         0.69043844, -1.        ]])

In [19]:
# Using random forest classifier to predict the appropriate warehouse in appropriate locations for moving aged products
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
clf1 = RandomForestClassifier()
clf1.fit(X_train, Y_train)
Y_pred = clf1.predict(X_test)

In [20]:
# Storing the predictions into a csv file
prediction = pd.DataFrame(product, columns=['Product'])
prediction['Stock_in_date'] = stock_in_date
prediction['Location'] = le.inverse_transform(Y_pred)

  if diff:


In [21]:
prediction.to_csv('Predict2.csv')