In [None]:
from google.colab import drive
drive.mount('/content/drive')

BASE_DIR = '/content/drive/MyDrive/sichuan_data' 
FEATURE_DIR = f'{BASE_DIR}/features'
RESULTS_DIR = f'{BASE_DIR}/results'

import os
import numpy as np
import pandas as pd

os.makedirs(RESULTS_DIR, exist_ok=True)

print(FEATURE_DIR)
print(RESULTS_DIR)

In [None]:
features_path = f'{FEATURE_DIR}/sichuan_features.npy'
meta_path = f'{FEATURE_DIR}/sichuan_features_metadata.csv'

features = np.load(features_path)
meta_df = pd.read_csv(meta_path)

print('features shape:', features.shape)
meta_df.head()

In [None]:
df = meta_df.dropna(subset=['gdp']).copy()
y = df['gdp'].values.astype('float32')


X = features[df.index.values]

X.shape, y.shape

In [None]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_absolute_error

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

model = Ridge(alpha=1.0)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print('R^2:', r2)
print('MAE:', mae)

In [None]:
metrics = {
    'r2': float(r2),
    'mae': float(mae),
    'n_train': int(len(X_train)),
    'n_test': int(len(X_test)),
}

metrics_path = os.path.join(RESULTS_DIR, 'metrics_cnn_features.json')

import json
with open(metrics_path, 'w') as f:
    json.dump(metrics, f, indent=2)

metrics_path

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(5, 5))
plt.scatter(y_test, y_pred, alpha=0.6)
plt.xlabel('True GDP')
plt.ylabel('Predicted GDP')
plt.title('True vs predicted GDP (CNN features)')
plt.grid(True)

plot_path = os.path.join(RESULTS_DIR, 'scatter_true_vs_pred_cnn.png')
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plot_path