In [43]:
import pandas as pd
import numpy as np 
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split

### Reading and Preprocessing data

In [44]:
# Reading census dataset 
df=pd.read_csv("census.csv")

# Removing unwanted coloumns from our training
df=df.drop(["race","sex","education_level"],axis=1)

In [45]:
# Replacing each string value in dataset with a corrosponding integer value
def preprocess(df):
    column = ["workclass","education-num","marital-status","occupation","relationship","native-country","income",]
    for col in column:
        Unique_values = list(df[col].unique())
        dic={}
        for x in range(len(Unique_values)):
            dic.update({Unique_values[x]:x})

        df[col]=df[col].map(dic).fillna(df[col])


preprocess(df)

In [46]:
# Spliting input data and True outcome
X=df.drop(["income"],axis=1)
Y=df["income"]

# Getting training and Validation Splits 
X_train,X_val,Y_train,Y_val=train_test_split(X,Y,test_size=0.2,shuffle=True)

### Models

#### SVM

In [47]:
# Creating SVM model and fitting it
model = svm.SVC()
model.fit(X_train,Y_train)

In [48]:
# Checking model Accuracy 
y_pred= model.predict(X_val)
accuracy_score(Y_val,y_pred)

0.8018794914317302

#### MLP: MultiLayered Perceptron

In [67]:
model = MLPClassifier(solver='lbfgs', alpha=1e-5, activation="tanh",early_stopping= True,
                      hidden_layer_sizes=(16, 4),verbose=True,max_iter=10000)

model.fit(X_train,Y_train)

In [68]:
y_pred=model.predict(X_val)
accuracy_score(Y_val,y_pred)

0.812824765063571

In [69]:
pip install optuna

Collecting optuna
  Downloading optuna-3.6.0-py3-none-any.whl (379 kB)
Collecting alembic>=1.5.0
  Downloading alembic-1.13.1-py3-none-any.whl (233 kB)
Collecting colorlog
  Downloading colorlog-6.8.2-py3-none-any.whl (11 kB)
Collecting typing-extensions>=4
  Downloading typing_extensions-4.10.0-py3-none-any.whl (33 kB)
Collecting Mako
  Downloading Mako-1.3.2-py3-none-any.whl (78 kB)
Installing collected packages: typing-extensions, Mako, colorlog, alembic, optuna
  Attempting uninstall: typing-extensions
    Found existing installation: typing-extensions 3.10.0.2
    Uninstalling typing-extensions-3.10.0.2:
      Successfully uninstalled typing-extensions-3.10.0.2
Successfully installed Mako-1.3.2 alembic-1.13.1 colorlog-6.8.2 optuna-3.6.0 typing-extensions-4.10.0
Note: you may need to restart the kernel to use updated packages.


ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow 2.9.1 requires flatbuffers<2,>=1.12, but you have flatbuffers 23.5.26 which is incompatible.
tensorflow 2.9.1 requires protobuf<3.20,>=3.9.2, but you have protobuf 3.20.3 which is incompatible.


In [75]:
import optuna
import sklearn
import sklearn.datasets
import sklearn.neural_network

def objective(trial):

    # ネットワーク構造の決定
    n_layers = trial.suggest_int('n_layers', 1, 4)
    layers = []
    for i in range(n_layers):
        layers.append(trial.suggest_int(f'n_units_{i}', 8, 100))



    # モデルの学習
    clf = sklearn.neural_network.MLPClassifier(hidden_layer_sizes=tuple(layers))
    clf.fit(X_train, Y_train)

    # 学習したモデルの評価
    return clf.score(X_train, Y_val)

study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=100)

[I 2024-03-24 00:37:19,487] A new study created in memory with name: no-name-a6ce43f7-997b-4ef1-9557-a814615f73e0
