In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
df = pd.read_csv("cwurData.csv")
features = ["quality_of_education", "alumni_employment", "quality_of_faculty", "publications", "influence", "citations", "patents", "score"]

In [4]:
X = df[features]
y = df["world_rank"]
X = X.fillna(X.mean())
y = y.fillna(y.mean())
df = df.drop(columns=['institution', 'country', 'national_rank', 'year', 'broad_impact'])  # 去掉不相关列

In [5]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
model = RandomForestRegressor(n_estimators=10000, random_state=42, n_jobs=8)  # 设置100棵树

In [6]:
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

In [7]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
threshold = 10
accuracy = np.mean(np.abs(y_test - y_pred) <= threshold)
print("Feature Importances:", model.feature_importances_)  # 输出特征的重要性
print("Mean Squared Error:", mse)
print("R-squared:", r2)
print("Accuracy (within ±{} ranks):".format(threshold), accuracy)
output_df = pd.DataFrame({"Actual Rank": y_test.values, "Predicted Rank": y_pred})
print(output_df.head(10))

Feature Importances: [0.0041732  0.00558172 0.02412904 0.16895522 0.01863325 0.01001639
 0.00369626 0.76481492]
Mean Squared Error: 151.82204194263642
R-squared: 0.9983439605519461
Accuracy (within ±10 ranks): 0.865909090909091
   Actual Rank  Predicted Rank
0          252        254.7470
1          135        144.4106
2          562        565.6845
3          536        535.3966
4          377        375.3373
5          911        913.9166
6          395        395.4291
7          331        328.6466
8          452        452.7210
9          620        619.1286


In [8]:
new_university = pd.DataFrame([[355, 423, 210, 558, 558, 363, 737, 44.77]], columns=features)
predicted_rank = model.predict(new_university)
print("Predicted World Rank for University of Salamanca:", predicted_rank[0])

Predicted World Rank for University of Salamanca: 619.1286
