# Advanced Certification in AIML
## A Program by IIIT-H and TalentSprint

## Learning Objective

At the end of the experiment, you will be able to :

* Classify fruits data using KNN classifier
* Visualize the predictions before and after scaling

## Dataset

The dataset chosen for this  experiment is a handmade fruits dataset. The dataset contains 60 records. Each record represents the following details of fruits : 

*  Weight -   It is the mass of an object. With respect to this dataset, we have calculated the weights in grams 

* Sphericity -   is a measure of how closely the shape of an object approaches that of a mathematically perfect sphere.

* Color -  Every fruit has a different color at different stages. You can encode the color to an integer value. For example

     - Green as 20
     - Greenish Yellow as 40
     - Orange as 60
     - Red as 80
     - Reddish Yellow as 100

*  Label -   We have considered two fruits for simplicity. They are Apple and Orange.




In [None]:
!wget https://cdn.talentsprint.com/aiml/Experiment_related_data/fruits_weight_sphercity.csv

#### Importing the required packages

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
fruits_data = pd.read_csv('fruits_weight_sphercity.csv')
fruits_data.head()

In [None]:
# Encode the labels and Color column
fruits_data['Color'] = fruits_data['Color'].replace(['Green', 'Greenish yellow', 'Orange', 'Red','Reddish yellow'],[20, 40, 60, 80, 100])  
fruits_data['labels'] = fruits_data['labels'].replace(['apple','orange'],[1, 0])

In [None]:
fruits_data.shape

### Taking the data samples for training after the interval of  3

In [None]:
# Considering only 20 samples for Train Set
train = fruits_data[0:60:3] 
train

#### Checking for length of the dataset

In [None]:
print(len(fruits_data))
print(len(train))
print(type(train))

In [None]:
# Considering 5 samples for Test set after the interval of 10
test = fruits_data[1:50:10] 
test

In [None]:
print(len(test))

In [None]:
# Features of training data and testing data  
traindata = train.iloc[:, 1:3] 
testdata = test.iloc[:, 1:3]

In [None]:
traindata.head()

In [None]:
traindata.head()

In [None]:
traindata.shape, testdata.shape

### Applying KNN Classifier on the data

In [None]:
from sklearn.neighbors import KNeighborsClassifier
k = 3
neigh = KNeighborsClassifier(n_neighbors=k)

In [None]:
# Training or fitting the model with the train data
neigh.fit(traindata, train.labels)

# Testing the trained model
predictions = neigh.predict(testdata)

In [None]:
print(predictions, "predictions")
print(test.labels.values, "Actual_labels")

In [None]:
# Stacking the test data with predictions (can be used for plotting)
predicted_data = np.column_stack((testdata, predictions))
print(predicted_data)

### Plotting The train, test and predictions before scaling

In [None]:
fig = plt.figure(figsize=(12,5))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

# Subplot 1
# Extracting Oranges and Apples from train and test data
oranges_train, oranges_test = train[train.labels == 0], test[test.labels == 0]  
apples_train, apples_test = train[train.labels == 1], test[test.labels == 1]

# Plotting Train and Test data
ax1.scatter(oranges_train.Sphericity, oranges_train.Weight, color="green", marker="o")
ax1.scatter(apples_train.Sphericity, apples_train.Weight, color="red", marker="o")
ax1.scatter(oranges_test.Sphericity, oranges_test.Weight, color="green", marker="*", s=200) 
ax1.scatter(apples_test.Sphericity, apples_test.Weight, color="red", marker="*", s=200)

# Subplot 2
# Extracting Oranges and Apples from Predicted Data
oranges_pred = predicted_data[predicted_data[:,2] == 0]
apples_pred = predicted_data[predicted_data[:,2] == 1]

# Plotting the predictions
ax2.scatter(oranges_train.Sphericity, oranges_train.Weight, color="green", marker="o")
ax2.scatter(apples_train.Sphericity, apples_train.Weight, color="red", marker="o")
ax2.scatter(oranges_pred[:,1], oranges_pred[:,0], color="green", marker="*", s=200)
ax2.scatter(apples_pred[:,1], apples_pred[:,0], color="red", marker="*", s=200)

ax1.legend(["Oranges-Train", "Apples-Train", "Oranges-Test", "Apples-Test"])
ax2.legend(["Oranges-Train", "Apples-Train", "Oranges-Predictions", "Apples-Predictions"])
ax1.set_xlabel('Sphericity')
ax2.set_xlabel('Sphericity')
ax1.set_ylabel('Weight')
ax2.set_ylabel('Weight')
ax1.set_title("Train and Test data points")
ax2.set_title("Predictions before scaling")
plt.show()

## Scaling the data

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

In [None]:
# Data Before Scaling
fruits_data.head()

In [None]:
fruits_data[['Sphericity', 'Weight']] = scaler.fit_transform(fruits_data[['Sphericity', 'Weight']])

In [None]:
# Data After Scaling 
fruits_data.head()

### Taking the data samples for training after the interval of  3

In [None]:
train = fruits_data[0:60:3]
train.head()

In [None]:
test = fruits_data[1:50:10]
test

In [None]:
print(len(test))

#### Apply KNN Classifier on the scaled data

In [None]:
from sklearn.neighbors import KNeighborsClassifier
k = 3
neigh = KNeighborsClassifier(n_neighbors=k)

In [None]:
# Converting dataframe into array
traindata = train.iloc[:,1:3] 
testdata = test.iloc[:,1:3]

In [None]:
# Training or fitting the model with the train data
neigh.fit(traindata, train.labels)

# Testing the trained model
scaled_predictions = neigh.predict(testdata)

In [None]:
print(scaled_predictions,"predictions") 
print(test.labels.values,"labels")

In [None]:
predicted_data = np.column_stack((testdata, scaled_predictions))
print(predicted_data)

### Plotting the train and test points after scaling

In [None]:
fig = plt.figure(figsize=(12,5))
ax1 = fig.add_subplot(121)
ax2 = fig.add_subplot(122)

# Subplot 1
# Extracting Oranges and Apples from train and test data
oranges1_train, oranges1_test = train[train.labels == 0], test[test.labels == 0]  
apples1_train, apples1_test = train[train.labels == 1], test[test.labels == 1]

# Plotting Train and Test data
ax1.scatter(oranges1_train.Sphericity, oranges1_train.Weight, color="green", marker="o")
ax1.scatter(apples1_train.Sphericity, apples1_train.Weight, color="red", marker="o")
ax1.scatter(oranges1_test.Sphericity, oranges1_test.Weight, color="green", marker="*", s=200) 
ax1.scatter(apples1_test.Sphericity, apples1_test.Weight, color="red", marker="*", s=200)

# Subplot 2
# Extracting Oranges and Apples from Predicted Data
oranges1_pred = predicted_data[predicted_data[:,2] == 0]
apples1_pred = predicted_data[predicted_data[:,2] == 1]

# Plotting the predictions
ax2.scatter(oranges1_train.Sphericity, oranges1_train.Weight, color="green", marker="o")
ax2.scatter(apples1_train.Sphericity, apples1_train.Weight, color="red", marker="o")
ax2.scatter(oranges1_pred[:,1], oranges1_pred[:,0], color="green", marker="*", s=200)
ax2.scatter(apples1_pred[:,1], apples1_pred[:,0], color="red", marker="*", s=200)

ax1.legend(["Oranges-Train", "Apples-Train", "Oranges-Test", "Apples-Test"])
ax2.legend(["Oranges-Train", "Apples-Train", "Oranges-Predictions", "Apples-Predictions"])
ax1.set_xlabel('Sphericity')
ax2.set_xlabel('Sphericity')
ax1.set_ylabel('Weight')
ax2.set_ylabel('Weight')
ax1.set_title("Train and Test data points")
ax2.set_title("Predictions after scaling")
plt.show()