# **Cargar el dataset**

In [1]:
import pandas as pd
import numpy as np

data = pd.read_csv('Banana_Ripeness_Dataset.csv')

df = pd.DataFrame(data)

df.head(15)

Unnamed: 0,SampleID,R,G,B,view,class
0,1,128.387045,148.385068,88.125925,front,light green
1,2,91.396778,114.322955,48.277243,front,green
2,3,87.804595,112.774147,50.034383,front,green
3,4,81.901377,103.88664,37.110752,front,light green
4,5,94.311041,120.103604,56.237598,front,green
5,6,76.520608,102.599004,41.981733,front,light green
6,7,93.302772,116.598634,53.715915,front,green
7,8,89.898268,109.027225,52.059142,front,light green
8,9,86.83269,113.458006,49.634117,front,light green
9,10,89.010214,113.309468,52.686793,front,light green


# **Verificar y limpiar de datos**

In [2]:
df = df.drop(['view', 'SampleID'], axis=1)
df.head(15)

Unnamed: 0,R,G,B,class
0,128.387045,148.385068,88.125925,light green
1,91.396778,114.322955,48.277243,green
2,87.804595,112.774147,50.034383,green
3,81.901377,103.88664,37.110752,light green
4,94.311041,120.103604,56.237598,green
5,76.520608,102.599004,41.981733,light green
6,93.302772,116.598634,53.715915,green
7,89.898268,109.027225,52.059142,light green
8,86.83269,113.458006,49.634117,light green
9,89.010214,113.309468,52.686793,light green


# **Identificar X y Y**

In [3]:
x= df.loc[:,['R', 'G', 'B']]
y= df.loc[:,['class']]

# **Ingeniería de características**

### **One Hot Encoding**

In [4]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder

label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(df.loc[:,['class']])

onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
Y = pd.DataFrame(onehot_encoded, columns=["Green", "Light Green", "Yellowish Green"])
Y.head(15)

  y = column_or_1d(y, warn=True)


Unnamed: 0,Green,Light Green,Yellowish Green
0,0.0,1.0,0.0
1,1.0,0.0,0.0
2,1.0,0.0,0.0
3,0.0,1.0,0.0
4,1.0,0.0,0.0
5,0.0,1.0,0.0
6,1.0,0.0,0.0
7,0.0,1.0,0.0
8,0.0,1.0,0.0
9,0.0,1.0,0.0


### **Normalización**

In [5]:
from sklearn.preprocessing import StandardScaler, RobustScaler

scaler_mean = StandardScaler(with_mean=True, with_std=False)


scaler_minmax = RobustScaler(with_centering=False,
                             with_scaling=True,
                             quantile_range=(0, 100))
scaler_mean.fit(x)
scaler_minmax.fit(x)

# transform train and test sets
X_scaled = scaler_minmax.transform(scaler_mean.transform(x))

  "X does not have valid feature names, but"


In [6]:
X_scaled = pd.DataFrame(X_scaled, columns=x.columns)

In [7]:
np.round(x.describe(), 1)

Unnamed: 0,R,G,B
count,1164.0,1164.0,1164.0
mean,80.9,101.8,45.1
std,17.5,16.6,14.1
min,29.3,50.5,9.0
25%,68.7,90.8,35.0
50%,81.0,102.7,44.2
75%,92.7,113.7,54.6
max,136.6,155.4,98.8


In [8]:
np.round(X_scaled.describe(), 1)

Unnamed: 0,R,G,B
count,1164.0,1164.0,1164.0
mean,-0.0,-0.0,0.0
std,0.2,0.2,0.2
min,-0.5,-0.5,-0.4
25%,-0.1,-0.1,-0.1
50%,0.0,0.0,-0.0
75%,0.1,0.1,0.1
max,0.5,0.5,0.6


# **Training**

In [9]:
from sklearn.model_selection import train_test_split

data = pd.concat([X_scaled, Y], axis=1)

train, test = train_test_split(data,
                               test_size=0.3,
                               random_state=0)

train.shape, test.shape

((814, 6), (350, 6))

In [10]:
train.head(15)

Unnamed: 0,R,G,B,Green,Light Green,Yellowish Green
618,-0.05539,-0.037974,-0.049566,0.0,1.0,0.0
1138,0.419084,0.307494,0.372882,0.0,0.0,1.0
278,0.016547,-0.010635,0.001937,0.0,1.0,0.0
851,-0.189154,-0.117164,-0.242097,0.0,1.0,0.0
1136,0.016603,0.038131,0.033214,0.0,1.0,0.0
279,-0.065557,-0.023461,-0.035386,1.0,0.0,0.0
366,-0.073297,-0.122789,-0.148046,0.0,0.0,1.0
1101,0.001469,0.015901,0.060004,1.0,0.0,0.0
834,-0.206245,-0.130089,-0.221849,0.0,1.0,0.0
206,0.152766,0.197718,0.203748,0.0,0.0,1.0


# **Testing**

In [11]:
test.head(15)

Unnamed: 0,R,G,B,Green,Light Green,Yellowish Green
915,-0.349444,-0.369619,-0.372386,1.0,0.0,0.0
590,-0.240258,-0.249927,-0.214589,0.0,1.0,0.0
108,0.126349,0.136304,0.243719,0.0,1.0,0.0
833,-0.290602,-0.247004,-0.288997,1.0,0.0,0.0
1069,0.100795,0.142942,0.076298,0.0,0.0,1.0
995,0.228751,0.218315,0.120868,0.0,0.0,1.0
52,0.094078,0.099582,0.121412,0.0,0.0,1.0
491,-0.102009,-0.024883,-0.051582,0.0,1.0,0.0
592,0.089796,0.091903,0.050846,0.0,1.0,0.0
895,-0.096348,-0.020355,-0.161256,0.0,1.0,0.0
