# Restricted Boltzmann Machine 

## RBM is a 2 layered artificial neural network with generative capabilities. They can learn a probability distribution over its set of input. 

In [60]:
import opendatasets as od
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.neural_network import BernoulliRBM
from sklearn import linear_model, metrics
from sklearn.pipeline import Pipeline
from sklearn.base import clone
import matplotlib.pyplot as plt

In [9]:
od.download("https://www.kaggle.com/lepchenkov/usedcarscatalog")

Downloading usedcarscatalog.zip to .\usedcarscatalog


100%|████████████████████████████████████████████████████████████████████████████████| 824k/824k [00:02<00:00, 410kB/s]







In [10]:
csv_file = "./usedcarscatalog/cars.csv"
df_from_csv = pd.read_csv(csv_file)
df_from_csv.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 38531 entries, 0 to 38530
Data columns (total 30 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   manufacturer_name  38531 non-null  object 
 1   model_name         38531 non-null  object 
 2   transmission       38531 non-null  object 
 3   color              38531 non-null  object 
 4   odometer_value     38531 non-null  int64  
 5   year_produced      38531 non-null  int64  
 6   engine_fuel        38531 non-null  object 
 7   engine_has_gas     38531 non-null  bool   
 8   engine_type        38531 non-null  object 
 9   engine_capacity    38521 non-null  float64
 10  body_type          38531 non-null  object 
 11  has_warranty       38531 non-null  bool   
 12  state              38531 non-null  object 
 13  drivetrain         38531 non-null  object 
 14  price_usd          38531 non-null  float64
 15  is_exchangeable    38531 non-null  bool   
 16  location_region    385

In [12]:
df = df_from_csv
df.head()

Unnamed: 0,manufacturer_name,model_name,transmission,color,odometer_value,year_produced,engine_fuel,engine_has_gas,engine_type,engine_capacity,...,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,duration_listed
0,Subaru,Outback,automatic,silver,190000,2010,gasoline,False,gasoline,2.5,...,True,True,True,False,True,False,True,True,True,16
1,Subaru,Outback,automatic,blue,290000,2002,gasoline,False,gasoline,3.0,...,True,False,False,True,True,False,False,False,True,83
2,Subaru,Forester,automatic,red,402000,2001,gasoline,False,gasoline,2.5,...,True,False,False,False,False,False,False,True,True,151
3,Subaru,Impreza,mechanical,blue,10000,1999,gasoline,False,gasoline,3.0,...,False,False,False,False,False,False,False,False,False,86
4,Subaru,Legacy,automatic,black,280000,2001,gasoline,False,gasoline,2.5,...,True,False,True,True,False,False,False,False,True,7


In [27]:
df_x = df[["feature_0","feature_1","feature_2","feature_3","feature_4","feature_5","feature_6","feature_7","feature_8","feature_9"]]
df_x

Unnamed: 0,feature_0,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9
0,False,True,True,True,False,True,False,True,True,True
1,False,True,False,False,True,True,False,False,False,True
2,False,True,False,False,False,False,False,False,True,True
3,True,False,False,False,False,False,False,False,False,False
4,False,True,False,True,True,False,False,False,False,True
...,...,...,...,...,...,...,...,...,...,...
38526,False,True,False,False,True,True,False,False,True,True
38527,False,True,False,False,True,True,False,False,True,True
38528,False,True,False,False,True,True,False,False,True,True
38529,False,True,False,False,False,False,False,False,False,True


In [30]:
df_y = df["manufacturer_name"]
df_y

0          Subaru
1          Subaru
2          Subaru
3          Subaru
4          Subaru
           ...   
38526    Chrysler
38527    Chrysler
38528    Chrysler
38529    Chrysler
38530    Chrysler
Name: manufacturer_name, Length: 38531, dtype: object

In [32]:
oe = OrdinalEncoder()
oe.fit(df_x)
X_enc = oe.transform(df_x)
X_enc.shape

(38531, 10)

In [34]:
df_y = df_y.values.reshape(-1,1)
oe.fit(df_y)
Y_enc = oe.transform(df_y)
Y_decoded = oe.inverse_transform(Y_enc)
Y_decoded.shape

(38531, 1)

In [37]:
X_train, X_test, Y_train, Y_test = train_test_split(X_enc, Y_decoded, test_size = 0.2)

In [50]:
logistic = linear_model.LogisticRegression(solver = "newton-cg", tol = 1, C = 1.0, class_weight = None, dual = False, fit_intercept = True, intercept_scaling = 1, l1_ratio = None, max_iter = 100, multi_class = 'auto', n_jobs = None, penalty = 'l2', random_state = None, warm_start = True)
rbm = BernoulliRBM(random_state = 0, verbose = False, learning_rate = 100, n_iter = 20, n_components = 1000, batch_size = 50)
rbm_features_classifier = Pipeline(steps = [("rbm",rbm),("logistic",logistic)])

In [51]:
rbm_features_classifier.fit(X_train, Y_train)

  y = column_or_1d(y, warn=True)


Pipeline(steps=[('rbm',
                 BernoulliRBM(batch_size=50, learning_rate=100,
                              n_components=1000, n_iter=20, random_state=0,
                              verbose=False)),
                ('logistic',
                 LogisticRegression(solver='newton-cg', tol=1,
                                    warm_start=True))])

In [54]:
raw_pixel_classifier = clone(logistic)
raw_pixel_classifier.C = 700
raw_pixel_classifier.fit(X_train, Y_train)

  y = column_or_1d(y, warn=True)


LogisticRegression(C=700, solver='newton-cg', tol=1, warm_start=True)

In [57]:
Y_pred = rbm_features_classifier.predict(X_test)
print(
    "Logistic regression using RBM features:\n%s\n"
    % (metrics.classification_report(Y_test, Y_pred))
)

Logistic regression using RBM features:
               precision    recall  f1-score   support

        Acura       0.00      0.00      0.00        10
   Alfa Romeo       0.00      0.00      0.00        37
         Audi       0.00      0.00      0.00       480
          BMW       0.00      0.00      0.00       477
        Buick       0.00      0.00      0.00         9
     Cadillac       0.00      0.00      0.00        11
        Chery       0.00      0.00      0.00        11
    Chevrolet       0.00      0.00      0.00        87
     Chrysler       0.00      0.00      0.00        82
      Citroen       0.00      0.00      0.00       310
        Dacia       0.00      0.00      0.00        11
       Daewoo       0.00      0.00      0.00        34
        Dodge       0.00      0.00      0.00        58
         Fiat       0.00      0.00      0.00       158
         Ford       0.00      0.00      0.00       562
        Geely       0.00      0.00      0.00         9
   Great Wall       0.00

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [58]:
Y_pred = raw_pixel_classifier.predict(X_test)
print(
    "Logistic regression using raw pixel features:\n%s\n"
    % (metrics.classification_report(Y_test, Y_pred))
)

Logistic regression using raw pixel features:
               precision    recall  f1-score   support

        Acura       0.00      0.00      0.00        10
   Alfa Romeo       0.00      0.00      0.00        37
         Audi       0.11      0.05      0.07       480
          BMW       0.18      0.60      0.28       477
        Buick       0.00      0.00      0.00         9
     Cadillac       0.00      0.00      0.00        11
        Chery       0.00      0.00      0.00        11
    Chevrolet       0.00      0.00      0.00        87
     Chrysler       0.00      0.00      0.00        82
      Citroen       0.10      0.03      0.05       310
        Dacia       0.00      0.00      0.00        11
       Daewoo       0.00      0.00      0.00        34
        Dodge       0.00      0.00      0.00        58
         Fiat       0.00      0.00      0.00       158
         Ford       0.08      0.01      0.02       562
        Geely       0.00      0.00      0.00         9
   Great Wall     

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
