# 携帯電話価格帯分類のコンペの前情報

## 前処理

In [1]:
#必要なライブラリのインストール
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
train_filename = "../data/train.csv"
test_filename = "../data/test.csv"
#データの読み込み
train_df = pd.read_csv(train_filename) 
test_df= pd.read_csv(test_filename)

## パラメータを調整


In [29]:
from sklearn.preprocessing import StandardScaler
import xgboost as xgb

# 特徴量（Features）とターゲット（Target）の分離
X_train = train_df.drop(['price_range', 'id'], axis=1)
y_train = train_df['price_range']
X_test = test_df.drop(['id'], axis=1)

# データの標準化
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# XGBoostモデルのパラメータ設定
params = {
    'learning_rate': 0.1,
    'n_estimators': 200,
    'max_depth': 4,
    'min_child_weight': 1,
    'gamma': 0,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'reg_alpha': 0.1,
    'reg_lambda': 1,
    'objective': 'multi:softmax',  # マルチクラス分類
    'num_class': 4  # クラス数
}

#訓練データとテストデータの分割
from sklearn.model_selection import train_test_split
X_train_scaled, X_val_scaled, y_train, y_val = train_test_split(X_train_scaled, y_train, test_size=0.2, random_state=4)

xgb_model = xgb.XGBClassifier(**params, use_label_encoder=False, eval_metric='mlogloss')

# モデルの訓練
xgb_model.fit(X_train_scaled, y_train)

# モデルの評価
from sklearn.metrics import accuracy_score, f1_score
y_pred = xgb_model.predict(X_val_scaled)
accuracy = accuracy_score(y_val, y_pred)
print(f'Accuracy: {accuracy}')
f1_macro = f1_score(y_val, y_pred, average='macro')
print(f'f1_macro: {f1_macro:.4f}')



Accuracy: 0.5041666666666667
f1_macro: 0.4831


In [31]:
test_predict = xgb_model.predict(X_test_scaled)
test_predict

array([1, 0, 3, 1, 2, 3, 0, 3, 3, 1, 2, 2, 3, 1, 3, 1, 1, 0, 0, 1, 2, 1,
       2, 3, 1, 1, 1, 0, 3, 2, 0, 1, 1, 2, 1, 2, 0, 0, 0, 0, 0, 3, 1, 0,
       2, 2, 3, 0, 2, 2, 2, 2, 3, 1, 1, 1, 2, 1, 3, 2, 2, 0, 0, 1, 2, 2,
       2, 2, 0, 2, 0, 1, 2, 2, 0, 1, 0, 0, 2, 1, 0, 1, 0, 2, 2, 3, 3, 0,
       0, 0, 0, 3, 1, 0, 3, 2, 2, 3, 2, 2, 0, 2, 0, 2, 2, 0, 3, 2, 2, 1,
       2, 0, 2, 1, 1, 1, 3, 1, 2, 1, 2, 3, 1, 1, 0, 2, 2, 2, 0, 2, 1, 0,
       1, 2, 3, 1, 0, 2, 1, 3, 2, 1, 2, 3, 2, 1, 1, 1, 2, 0, 2, 3, 2, 2,
       2, 0, 2, 2, 1, 3, 2, 0, 1, 3, 2, 3, 0, 1, 1, 1, 2, 3, 1, 3, 2, 3,
       2, 1, 2, 1, 0, 2, 0, 0, 2, 2, 1, 3, 2, 2, 0, 1, 2, 1, 3, 0, 1, 1,
       1, 0, 3, 2, 0, 2, 2, 3, 2, 1, 1, 0, 1, 1, 1, 0, 1, 2, 2, 2, 3, 1,
       2, 0, 1, 1, 1, 1, 1, 2, 1, 1, 3, 1, 2, 0, 2, 1, 2, 2, 2, 0, 2, 0,
       0, 0, 0, 0, 0, 2, 1, 2, 2, 3, 2, 2, 3, 2, 0, 1, 3, 0, 0, 2, 3, 2,
       1, 3, 2, 1, 1, 2, 1, 2, 1, 2, 0, 0, 1, 1, 2, 1, 1, 2, 1, 3, 2, 2,
       2, 0, 0, 3, 1, 2, 1, 3, 1, 2, 2, 1, 2, 0, 0,

In [32]:
#sample_submissionの読み込み
import csv

# sample_submissionファイルのパスと名前
sample_filename = "../data/sample_submission.csv"
#提出データのパスと名前
submit_filename = "../submit/submit7.csv"

submit_data = []
# CSVファイルを読み込む
with open(sample_filename, 'r', encoding='utf-8') as file:
    reader = csv.reader(file)
    # 各行を処理する
    i = 0
    for row in reader:
        row[1] = test_predict[i]
        submit_data.append(row)
        i += 1


# 書き換えたデータをCSVファイルに書き込む
with open(submit_filename, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    for row in submit_data:
        writer.writerow(row)
