In [1]:
#必要なライブラリのインストール
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report, f1_score
import xgboost as xgb

In [2]:
train_filename = "../data/train.csv"
test_filename = "../data/test.csv"
#データの読み込み
train_df = pd.read_csv(train_filename) 
test_df= pd.read_csv(test_filename)

In [3]:
from sklearn.model_selection import train_test_split
# 特徴量（Features）とターゲット（Target）の分離
X = train_df.drop(['price_range','id'], axis=1)
y = train_df['price_range']

In [4]:
# データの標準化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [6]:
# 訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=5)

In [7]:
# XGBoostのモデル構築と訓練
xgb_model = xgb.XGBClassifier(use_label_encoder=False, eval_metric='mlogloss')
xgb_model.fit(X_train, y_train)

# 予測と評価
y_pred = xgb_model.predict(X_test)
f1_macro = f1_score(y_test, y_pred, average='macro')
print("XGBoost")
print(f'F1 Macro Score: {f1_macro:.4f}')
print(classification_report(y_test, y_pred))

XGBoost
F1 Macro Score: 0.4772
              precision    recall  f1-score   support

           0       0.39      0.42      0.41        38
           1       0.41      0.42      0.41        62
           2       0.75      0.59      0.66        92
           3       0.38      0.50      0.43        48

    accuracy                           0.50       240
   macro avg       0.48      0.48      0.48       240
weighted avg       0.53      0.50      0.51       240



In [35]:
X_test = test_df.drop(['id'], axis=1)
X_test_scaled = scaler.transform(X_test) #testデータも標準化



In [36]:
# テストデータで予測
test_predict = xgb_model.predict(X_test_scaled)
test_predict

array([3, 0, 3, 0, 2, 3, 0, 3, 3, 1, 2, 2, 3, 1, 3, 1, 1, 2, 0, 2, 2, 1,
       2, 3, 1, 1, 1, 1, 3, 2, 3, 3, 1, 2, 1, 2, 1, 0, 0, 0, 3, 3, 1, 0,
       1, 2, 1, 0, 2, 2, 1, 2, 1, 1, 3, 1, 2, 1, 1, 2, 1, 3, 2, 3, 2, 2,
       2, 2, 0, 3, 0, 3, 2, 1, 0, 1, 0, 1, 2, 2, 1, 1, 0, 2, 2, 0, 3, 1,
       0, 1, 3, 0, 1, 3, 3, 2, 3, 1, 2, 2, 0, 1, 0, 2, 2, 0, 2, 2, 3, 2,
       2, 2, 2, 0, 1, 1, 2, 3, 1, 1, 3, 3, 2, 1, 0, 2, 2, 2, 0, 2, 1, 0,
       1, 2, 1, 3, 0, 2, 1, 3, 2, 2, 2, 3, 2, 2, 3, 1, 2, 0, 2, 3, 2, 2,
       2, 0, 1, 2, 3, 3, 2, 1, 1, 1, 2, 3, 0, 2, 1, 1, 2, 3, 1, 3, 2, 3,
       3, 1, 2, 1, 2, 2, 0, 0, 2, 2, 1, 3, 2, 2, 1, 3, 1, 3, 0, 0, 3, 1,
       1, 1, 3, 2, 1, 2, 2, 3, 2, 1, 1, 0, 3, 1, 3, 2, 1, 2, 2, 2, 3, 3,
       2, 0, 1, 2, 3, 1, 1, 2, 1, 1, 3, 1, 2, 0, 0, 2, 3, 2, 2, 2, 2, 0,
       0, 0, 3, 0, 0, 2, 3, 3, 0, 3, 2, 2, 0, 3, 0, 0, 3, 0, 0, 2, 3, 2,
       2, 0, 2, 1, 1, 2, 1, 2, 1, 2, 3, 0, 3, 1, 2, 1, 1, 2, 1, 3, 2, 1,
       2, 3, 0, 3, 2, 2, 2, 3, 1, 2, 2, 3, 2, 1, 1,

In [37]:
#sample_submissionの読み込み
import csv

# sample_submissionファイルのパスと名前
sample_filename = "../data/sample_submission.csv"
#提出データのパスと名前
submit_filename = "../submit/submit2.csv"

submit_data = []
# CSVファイルを読み込む
with open(sample_filename, 'r', encoding='utf-8') as file:
    reader = csv.reader(file)
    # 各行を処理する
    i = 0
    for row in reader:
        row[1] = test_predict[i]
        submit_data.append(row)
        i += 1


# 書き換えたデータをCSVファイルに書き込む
with open(submit_filename, 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    for row in submit_data:
        writer.writerow(row)