In [None]:
# The code for Sequential model
# loss ~ 1.5444; accuracy ~ 0.4700
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the dataset
df = pd.read_csv('label.csv')

# Initialize LabelEncoders for categorical features
label_encoders = {
    'sex': LabelEncoder(),
    'age': LabelEncoder(),
    'budget': LabelEncoder()
}

# Encode 'sex', 'age', and 'budget' columns
for col in ['sex', 'age', 'budget']:
    df[col] = label_encoders[col].fit_transform(df[col])

# Features and target
X = df[['sex', 'age', 'budget']].values
y = df.iloc[:, 3:].values  # The genre columns

# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the model
model = Sequential()
model.add(Dense(128, input_dim=X_train.shape[1], activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(32, activation='relu'))
model.add(Dense(y_train.shape[1], activation='softmax'))

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the model for more epochs
model.fit(X_train, y_train, epochs=100, batch_size=10, validation_split=0.2)

# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Example input for prediction
example_input = pd.DataFrame({
    'sex': ['female'],
    'age': ['20s'],
    'budget': ['15000~30000']
})

# Encode the example input using the same LabelEncoders
for col in ['sex', 'age', 'budget']:
    example_input[col] = label_encoders[col].fit_transform(example_input[col])

# Normalize the input
example_input = scaler.fit_transform(example_input)

# Make a prediction
predicted_genres = model.predict(example_input)
predicted_genre_index = predicted_genres.argmax()
predicted_genre = df.columns[3:][predicted_genre_index]
print(f'Recommended genre: {predicted_genre}')


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [None]:
# The code for the baseline accuracy
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

# Load the dataset
df = pd.read_csv('label.csv')

# Initialize LabelEncoders for categorical features
label_encoders = {
    'sex': LabelEncoder(),
    'age': LabelEncoder(),
    'budget': LabelEncoder()
}

# Encode 'sex', 'age', and 'budget' columns
for col in ['sex', 'age', 'budget']:
    df[col] = label_encoders[col].fit_transform(df[col])

# Features and target
X = df[['sex', 'age', 'budget']].values
y = df.iloc[:, 3:].values  # The genre columns

# Normalize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert the multiclass problem into binary classification problem for each genre
# For simplicity, we'll just compare one genre here (e.g., 'genre_한식')
genre_index = df.columns[3:].tolist().index('genre_한식')
y_train_genre = y_train[:, genre_index]
y_test_genre = y_test[:, genre_index]

# List of models to compare
models = {
    'Logistic Regression': LogisticRegression(),
    'Decision Tree': DecisionTreeClassifier(),
    'Random Forest': RandomForestClassifier(),
    'k-NN': KNeighborsClassifier(),
    'Naive Bayes': GaussianNB()
}

# Training and evaluating each model
for name, model in models.items():
    model.fit(X_train, y_train_genre)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test_genre, y_pred)
    print(f'{name} Accuracy: {accuracy * 100:.2f}%')

Logistic Regression Accuracy: 71.50%
Decision Tree Accuracy: 75.50%
Random Forest Accuracy: 74.50%
k-NN Accuracy: 73.00%
Naive Bayes Accuracy: 72.00%
