In [1]:
import pandas as pd
import numpy as np
import random
from sklearn.tree import DecisionTreeClassifier

In [2]:
df = pd.read_csv("E:\\DataSets\\Price\\train.csv")
test_df = df.iloc[:200]
train_df = df.iloc[200:]

In [3]:
train_df

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
200,1703,1,0.5,0,0,1,22,0.6,192,1,...,286,1235,1046,5,4,13,1,0,1,0
201,1445,1,2.4,1,7,1,20,0.4,173,7,...,1273,1345,1441,7,4,13,1,1,1,1
202,1087,0,1.3,0,0,1,16,0.3,166,3,...,295,589,690,14,6,6,1,0,1,0
203,671,0,0.9,1,10,0,30,0.7,105,7,...,852,1182,2504,6,4,15,1,0,0,2
204,1472,1,2.3,0,6,0,61,0.1,168,6,...,710,1052,2677,19,15,8,1,0,0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,794,1,0.5,1,0,1,2,0.8,106,6,...,1222,1890,668,13,4,19,1,1,0,0
1996,1965,1,2.6,1,0,0,39,0.2,187,4,...,915,1965,2032,11,10,16,1,1,1,2
1997,1911,0,0.9,1,1,1,36,0.7,108,8,...,868,1632,3057,9,1,5,1,1,0,3
1998,1512,0,0.9,0,4,1,46,0.1,145,5,...,336,670,869,18,10,19,1,1,1,0


In [4]:
test_df

Unnamed: 0,battery_power,blue,clock_speed,dual_sim,fc,four_g,int_memory,m_dep,mobile_wt,n_cores,...,px_height,px_width,ram,sc_h,sc_w,talk_time,three_g,touch_screen,wifi,price_range
0,842,0,2.2,0,1,0,7,0.6,188,2,...,20,756,2549,9,7,19,0,0,1,1
1,1021,1,0.5,1,0,1,53,0.7,136,3,...,905,1988,2631,17,3,7,1,1,0,2
2,563,1,0.5,1,2,1,41,0.9,145,5,...,1263,1716,2603,11,2,9,1,1,0,2
3,615,1,2.5,0,0,0,10,0.8,131,6,...,1216,1786,2769,16,8,11,1,0,0,2
4,1821,1,1.2,0,13,1,44,0.6,141,2,...,1208,1212,1411,8,2,15,1,1,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,1526,0,2.1,0,1,1,23,0.2,117,7,...,718,751,2227,18,10,3,1,1,0,2
196,1989,0,2.5,1,0,1,41,0.8,94,3,...,1100,1497,1665,17,9,12,1,1,1,2
197,1308,0,1.9,0,0,1,61,0.7,106,3,...,59,1215,3355,15,2,4,1,0,1,3
198,609,0,0.5,0,3,0,26,0.3,93,4,...,938,1948,1866,11,10,14,1,1,1,1


In [5]:
set(train_df['price_range']) ## Seeing how many price ranges we do have

{0, 1, 2, 3}

# Making our own model

In [6]:
def randomSplitting(df, n_splits): ## Splits our dataframe into samller dataframes
    df = np.array(df)
    sample_len = len(df)//n_splits
    rand_indices = np.random.choice(range(len(df)), replace = False, size = len(df)) ## Generates the random indices 
    rand_df = [df[n] for n in rand_indices]
    return np.array(rand_df)

In [7]:
def get_trees(df, n_splits): ## Makes the decision trees used for predictions
    df = np.array(df)
    s_len = len(df)//n_splits
    split_df = randomSplitting(df, n_splits)
    trees = []
    x = split_df[:,  :-1]
    y = split_df[:,  -1]
    for i in range(n_splits):
        tree = DecisionTreeClassifier()
        tree.fit(x[i * s_len : (i + 1) * s_len], y[i * s_len : (i + 1) * s_len])
        trees.append(tree)
    return trees

In [8]:
def get_prediction(trees, x): ## Gets a single prediction out using given trees
    labels = [item.predict([x]) for item in trees]
    labels_int = [int(label) for label in labels]
    return max(set(labels_int), key = labels_int.count)

In [9]:
n_trees = 25
forest_trees = get_trees(train_df, n_trees)

In [10]:
x_test = np.array(test_df)[:, :-1]
y_test = np.array(test_df)[:, -1]

In [11]:
predictions = []
for item in x_test:
    predictions.append(get_prediction(forest_trees, item))

In [12]:
def get_acc(a, b): ## Returns the accuracy of how the lists (a) and (b) match
    count = 0
    for i in range(len(a)):
        if a[i] == b[i]:
            count += 1
    return count/len(a)

In [13]:
get_acc(predictions, y_test)

0.795

# Using the sklearn model

In [14]:
from sklearn.ensemble import RandomForestClassifier

In [15]:
model = RandomForestClassifier(n_trees)
model.fit(train_df.iloc[:, :-1], train_df.iloc[:, -1])

RandomForestClassifier(n_estimators=25)

In [16]:
model.predict(x_test)

array([2, 3, 2, 2, 1, 1, 3, 0, 0, 0, 2, 3, 1, 2, 0, 0, 3, 2, 1, 1, 2, 3,
       1, 0, 2, 2, 2, 3, 1, 0, 3, 0, 1, 3, 0, 0, 1, 3, 2, 2, 2, 3, 1, 1,
       0, 1, 3, 1, 0, 3, 2, 2, 3, 1, 2, 2, 0, 3, 2, 2, 1, 0, 0, 2, 2, 3,
       3, 3, 0, 3, 3, 1, 0, 2, 0, 1, 0, 1, 3, 1, 3, 2, 3, 1, 1, 0, 0, 3,
       2, 2, 3, 2, 0, 0, 1, 3, 0, 1, 0, 1, 3, 1, 1, 0, 0, 1, 2, 3, 3, 3,
       1, 1, 0, 3, 3, 3, 2, 1, 0, 1, 2, 3, 3, 3, 2, 3, 2, 2, 3, 0, 3, 0,
       2, 2, 2, 2, 1, 3, 0, 0, 1, 0, 3, 2, 0, 3, 1, 3, 1, 2, 2, 3, 2, 2,
       0, 1, 0, 0, 3, 1, 3, 2, 0, 0, 2, 2, 1, 1, 2, 1, 3, 1, 2, 2, 2, 1,
       0, 2, 2, 1, 0, 0, 1, 3, 3, 0, 1, 2, 1, 3, 2, 2, 3, 3, 2, 2, 3, 3,
       1, 1], dtype=int64)

In [17]:
get_acc(model.predict(x_test), y_test)

0.82