In [None]:
import pandas as pd
import numpy as np
from lightgbm import LGBMRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# 데이터 불러오기 및 변환 (생략 가능)
# Update the file path to the correct location or ensure the file exists
df1 = pd.read_csv(r"D:\back_office-main\hoyeon\현대3.csv")
# Verify if '차종' exists in the DataFrame
if '차종' not in df1.columns:
    print("⚠️ Column '차종' not found in the DataFrame. Removing it from id_vars.")
    id_vars = ['국가명', '연도', '기후대', 'GDP', '차량 구분']  # Remove '차종' from id_vars
else:
    id_vars = ['국가명', '연도', '기후대', 'GDP', '차종', '차량 구분']

month_cols = [f"{i}월" for i in range(1, 13)]
df_long = pd.melt(df1, id_vars=id_vars, value_vars=month_cols, 
                  var_name='월', value_name='수출량')
df_long['월'] = df_long['월'].str.replace('월', '').astype(int)
df_long['날짜'] = pd.to_datetime(df_long['연도'].astype(str) + '-' + df_long['월'].astype(str) + '-01')
df_long = df_long.sort_values(by=['국가명', '날짜'])

# 시차/타겟 생성
df_long['전월_수출량'] = df_long.groupby('국가명')['수출량'].shift(1)
df_long['다음달_수출량'] = df_long.groupby('국가명')['수출량'].shift(-1)
df_model = df_long.dropna(subset=['전월_수출량', '다음달_수출량']).copy()

# 특성 정의
features = ['수출량', '전월_수출량', '연도', '월', 'GDP', '국가명', '기후대', '차종', '차량 구분']
target = '다음달_수출량'
categorical_features = ['국가명', '기후대', '차종', '차량 구분']

# 결과 저장용
results = []

# 전체 날짜 목록
unique_dates = sorted(df_model['날짜'].unique())

# 최소 12개월 학습 후 시작
for i in range(12, len(unique_dates) - 1):
    train_end = unique_dates[i]
    test_month = unique_dates[i + 1]
    
    # 학습 데이터: 현재 시점까지
    train_data = df_model[df_model['날짜'] <= train_end]
    test_data = df_model[df_model['날짜'] == test_month]
    
    # 특성 & 타겟
    X_train = train_data[features]
    y_train = train_data[target]
    X_test = test_data[features]
    y_test = test_data[target]

    # 인코딩
    X_train = pd.get_dummies(X_train, columns=categorical_features)
    X_test = pd.get_dummies(X_test, columns=categorical_features)

    # 열 정렬 맞추기
    X_test = X_test.reindex(columns=X_train.columns, fill_value=0)

    # 스케일링
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # 모델 학습 및 예측
    model = LGBMRegressor()
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)

    # 성능 측정
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # 결과 저장
    results.append({
        '기준월': test_month,
        'MSE': mse,
        'R2': r2
    })

# 결과 DataFrame
results_df = pd.DataFrame(results)
print(results_df)

# 평균 성능
print("\n✅ 전체 평균 성능")
print("평균 MSE:", results_df['MSE'].mean())
print("평균 R²:", results_df['R2'].mean())


KeyError: "The following id_vars or value_vars are not present in the DataFrame: ['차종']"

In [47]:
import joblib

# 모델 저장
joblib.dump(model, "h_lgbm_model.pkl")

# 스케일러 저장
joblib.dump(scaler, "h_scaler.pkl")
# 모델 저장
joblib.dump(X_train.columns.tolist(), "h_model_columns.pkl")




['h_model_columns.pkl']

In [2]:
import pandas as pd

df1 = pd.read_csv(r"D:\back_office-main\hoyeon\현대_기아.csv")

In [3]:
df1["차량 브랜드"].unique()

array(['Hyundai'], dtype=object)

In [41]:
# Filter out rows where the "차량 브랜드" column is 'kia'
df1 = df1[df1["차량 브랜드"] != 'Kia']

In [5]:
df1 = df1.drop(columns=["차량 브랜드"])

In [44]:
df1['기후대'].unique()

array(['온대', '한랭', '열대', '건조'], dtype=object)

In [7]:
import os

# Ensure the directory exists
output_dir = r"D:\back_office-main\hoyeon"
os.makedirs(output_dir, exist_ok=True)

# Save the file
df1.to_csv(os.path.join(output_dir, "현대_기아.csv"))

In [19]:
df1['국가명'].unique()

array(['US', 'Canada', 'Mexico', 'EU+EFTA', 'E.Europe/CIS',
       'Latin America', 'Middle East/Africa', 'Asia / Pacific', 'China',
       'India'], dtype=object)

In [18]:
# Load the dataframe
df = pd.read_csv(r"D:\back_office-main\hoyeon\기아.csv")

# Get unique values of the '차종' column
df['국가명'].unique()

array(['US', 'Canada', 'Mexico', 'EU+EFTA', 'E.Europe/CIS',
       'Latin America', 'Middle East/Africa', 'Asia / Pacific', 'China',
       'India'], dtype=object)

In [34]:
reverse_map = {
    'US': 'US',
    'Canada': 'Canada',
    'Mexico': 'Mexico',
    'EU+EFTA': 'EU+EFTA',
    'E.Europe/CIS': 'E.Europe/CIS',
    'Latin America': 'Latin America',
    'Middle East/Africa': 'Middle East/Africa',
    'Asia / Pacific': 'Asia / Pacific',
    'China': 'Asia / Pacific',
    'India': 'Asia / Pacific'
}

# 매핑 적용
df1['국가명'] = df['국가명'].map(reverse_map)

In [39]:
df1.to_csv(r"D:\back_office-main\hoyeon\현대.csv")

In [None]:
df1['기후대'].unique()

Traceback (most recent call last):
  File "c:\Users\vhzkf\.vscode\extensions\ms-python.python-2025.2.0-win32-x64\python_files\python_server.py", line 133, in exec_user_input
    retval = callable_(user_input, user_globals)
  File "<string>", line 1, in <module>
NameError: name 'df1' is not defined



Traceback (most recent call last):
  File "c:\Users\vhzkf\.vscode\extensions\ms-python.python-2025.2.0-win32-x64\python_files\python_server.py", line 133, in exec_user_input
    retval = callable_(user_input, user_globals)
  File "<string>", line 2
    %pip install pandas
    ^
SyntaxError: invalid syntax

