In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import RidgeCV
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split

def prepare_data(data_dict):
    # Combine all indicators into one DataFrame
    final_df = pd.DataFrame()
    final_df['Country Name'] = data_dict['political_stability']['Country Name']

    # Calculate averages for each indicator
    indicators = {
        'Political Stability': data_dict['political_stability'],
        'GDP': data_dict['gdp'],
        'Inflation Rate': data_dict['inflation_rate'],
        'Corruption Index': data_dict['corruption_index'],
        'Government Debt to GDP': data_dict['government_debt_to_gdp'],
        'GDP per Capita PPP': data_dict['gdp_per_capita_ppp'],
        'Trade Balance': data_dict['trade_balance_to_gdp'],
        'Unemployment Rate': data_dict['unemployment_rate'],
        'Foreign Debt to GDP': data_dict['foreign_debt_to_gdp']
    }

    for name, df in indicators.items():
        numeric_cols = df.select_dtypes(include=[np.number]).columns
        final_df[f'Average {name}'] = df[numeric_cols].mean(axis=1)

    return final_df

def preprocess_data(X):
    # Create imputer for handling NaN values
    imputer = SimpleImputer(strategy='mean')

    # Impute missing values
    X_imputed = pd.DataFrame(
        imputer.fit_transform(X),
        columns=X.columns,
        index=X.index
    )

    # Apply log transformation where needed (avoiding negative values)
    for col in X_imputed.columns:
        if 'GDP' in col or 'Inflation' in col:
            # Add small constant to handle zeros and negative values
            min_val = X_imputed[col].min()
            if min_val <= 0:
                X_imputed[col] = np.log(X_imputed[col] - min_val + 1)
            else:
                X_imputed[col] = np.log(X_imputed[col])

    return X_imputed

def build_model(X, y):
    # Standardize features
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    # Use Ridge Regression with cross-validation
    model = RidgeCV(alphas=np.logspace(-6, 6, 13))
    model.fit(X_scaled, y)

    return model, scaler

# Load your data
data_dict = {
    'political_stability': pd.read_csv('X1_Political_processed_filtered.csv'),
    'gdp': pd.read_csv('X2_GDP_processed_filtered.csv'),
    'inflation_rate': pd.read_csv('X3_Inflation_processed_filtered.csv'),
    'corruption_index': pd.read_csv('X4_Corruption Index_processed_filtered.csv'),
    'government_debt_to_gdp': pd.read_csv('X5_Government Debt to GDP Ratio_processed_filtered.csv'),
    'gdp_per_capita_ppp': pd.read_csv('X6_data_GDP per Capita (PPP).csv'),
    'trade_balance_to_gdp': pd.read_csv('X7-Trade Balance to GDP Ratio_processed_filtered.csv'),
    'unemployment_rate': pd.read_csv('X8-Unemployment Rate_processed_filtered.csv'),
    'foreign_debt_to_gdp': pd.read_csv('X9_Foreign Debt to GDP Ratio_processed_filtered.csv')
}

# Prepare the data
final_data = prepare_data(data_dict)

# Select features
X = final_data[[
    'Average Political Stability',
    'Average GDP',
    'Average Inflation Rate',
    'Average Corruption Index',
    'Average Government Debt to GDP',
    'Average GDP per Capita PPP',
    'Average Trade Balance',
    'Average Unemployment Rate',
    'Average Foreign Debt to GDP'
]]

# Preprocess the data
X_processed = preprocess_data(X)

# If you have the actual credit ratings, load them here
# y = pd.read_csv('credit_ratings.csv')['Rating']

# For demonstration, let's create a dummy target variable
# Remove this when you have actual credit ratings
y = np.random.randint(1, 21, size=len(X))  # Assuming ratings from 1 to 20

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X_processed, y, test_size=0.2, random_state=42
)

# Train the model
model, scaler = build_model(X_train, y_train)

# Make predictions
y_pred = model.predict(scaler.transform(X_test))

# Print results
print("Model Coefficients:")
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef:.4f}")

print("\nR² Score:", model.score(scaler.transform(X_test), y_test))

# Create results DataFrame
results = pd.DataFrame({
    'Country': final_data['Country Name'].iloc[X_test.index],
    'Actual Rating': y_test,
    'Predicted Rating': y_pred
})

print("\nPredictions for some countries:")
print(results.head())

Model Coefficients:
Average Political Stability: 0.0000
Average GDP: -0.0000
Average Inflation Rate: -0.0000
Average Corruption Index: 0.0000
Average Government Debt to GDP: 0.0000
Average GDP per Capita PPP: 0.0000
Average Trade Balance: -0.0000
Average Unemployment Rate: 0.0000
Average Foreign Debt to GDP: 0.0000

R² Score: -0.0017642601059881802

Predictions for some countries:
      Country  Actual Rating  Predicted Rating
8   Indonesia             17          8.399895
16  Mauritius             14          8.400105
0    Bulgaria              1          8.400034
24     Zambia              1          8.399970
11     Jordan             13          8.400129


# **What We Should Do:**


**Based on Table 4 in the paper, we should use these coefficients for S&P ratings:**

In [2]:
def build_correct_model():
    coefficients = {
        'Intercept': 13.584,
        'Political Stability': 1.290,
        'GDP': 0.000,  # very small but significant
        'Inflation Rate': -14.819,
        'Corruption Index': 0.113,
        'Government Debt to GDP': -2.191,
        'GDP per Capita PPP': 0.000,  # not significant
        'Trade Balance': -2.044,  # not significant
        'Unemployment Rate': -24.262,
        'Foreign Debt to GDP': 0.198  # not significant
    }
    return coefficients

def predict_rating(data, coefficients):
    """
    Calculate rating using the paper's equation:
    ln(R_t) = c + X1 + X2 + X3 + X4 + X5 + X6 + X7 + X8 + X9
    """
    log_rating = (coefficients['Intercept'] +
                 coefficients['Political Stability'] * data['Political Stability'] +
                 coefficients['GDP'] * np.log(data['GDP']) +
                 coefficients['Inflation Rate'] * data['Inflation Rate'] +
                 coefficients['Corruption Index'] * np.log(data['Corruption Index']) +
                 coefficients['Government Debt to GDP'] * data['Government Debt to GDP'] +
                 coefficients['GDP per Capita PPP'] * np.log(data['GDP per Capita PPP']) +
                 coefficients['Trade Balance'] * data['Trade Balance'] +
                 coefficients['Unemployment Rate'] * data['Unemployment Rate'] +
                 coefficients['Foreign Debt to GDP'] * data['Foreign Debt to GDP'])

    # Convert from ln(Rating) to Rating
    rating = np.exp(log_rating)
    return round(rating)

# **Implement New Model**

In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer

def prepare_data(data_dict):
    # Combine all indicators into one DataFrame
    final_df = pd.DataFrame()
    final_df['Country Name'] = data_dict['political_stability']['Country Name']

    # Define the mapping between file columns and required names
    column_mapping = {
        'political_stability': 'Political Stability',
        'gdp': 'GDP',
        'inflation_rate': 'Inflation Rate',
        'corruption_index': 'Corruption Index',
        'government_debt_to_gdp': 'Government Debt to GDP',
        'gdp_per_capita_ppp': 'GDP per Capita PPP',
        'trade_balance_to_gdp': 'Trade Balance',
        'unemployment_rate': 'Unemployment Rate',
        'foreign_debt_to_gdp': 'Foreign Debt to GDP'
    }

    # Calculate values for each indicator (using 2017 data)
    for file_name, target_name in column_mapping.items():
        df = data_dict[file_name]
        if '2017' in df.columns:
            final_df[target_name] = df['2017'].astype(float)

    # Print debug information
    print("Available columns:", final_df.columns.tolist())
    print("\nFirst few rows of data:")
    print(final_df.head())

    return final_df

def preprocess_features(df):
    # Create a copy to avoid modifying original data
    data = df.copy()

    # Handle missing values
    imputer = SimpleImputer(strategy='mean')
    numeric_cols = data.select_dtypes(include=[np.number]).columns
    data[numeric_cols] = imputer.fit_transform(data[numeric_cols])

    return data

def predict_rating(data, coefficients):
    """Calculate rating using the paper's exact methodology"""
    try:
        # Calculate rating using the coefficients from Table 4
        rating = (coefficients['Intercept'] +
                 coefficients['Political Stability'] * (data['Political Stability']/100) +
                 coefficients['Inflation Rate'] * (data['Inflation Rate']/100) +
                 coefficients['Corruption Index'] * data['Corruption Index'] +
                 coefficients['Government Debt to GDP'] * (data['Government Debt to GDP']/100) +
                 coefficients['Trade Balance'] * (data['Trade Balance']/100) +
                 coefficients['Unemployment Rate'] * (data['Unemployment Rate']/100) +
                 coefficients['Foreign Debt to GDP'] * (data['Foreign Debt to GDP']/100))

        return rating

    except Exception as e:
        print(f"Error in prediction: {e}")
        return np.nan

# Load data
data_dict = {
    'political_stability': pd.read_csv('X1_Political_processed_filtered.csv'),
    'gdp': pd.read_csv('X2_GDP_processed_filtered.csv'),
    'inflation_rate': pd.read_csv('X3_Inflation_processed_filtered.csv'),
    'corruption_index': pd.read_csv('X4_Corruption Index_processed_filtered.csv'),
    'government_debt_to_gdp': pd.read_csv('X5_Government Debt to GDP Ratio_processed_filtered.csv'),
    'gdp_per_capita_ppp': pd.read_csv('X6_data_GDP per Capita (PPP).csv'),
    'trade_balance_to_gdp': pd.read_csv('X7-Trade Balance to GDP Ratio_processed_filtered.csv'),
    'unemployment_rate': pd.read_csv('X8-Unemployment Rate_processed_filtered.csv'),
    'foreign_debt_to_gdp': pd.read_csv('X9_Foreign Debt to GDP Ratio_processed_filtered.csv')
}

# Define coefficients from the paper (Table 4)
sp_coefficients = {
    'Intercept': 13.584,
    'Political Stability': 1.290,  # X1
    'GDP': 0.000,  # X2
    'Inflation Rate': -14.819,  # X3
    'Corruption Index': 0.113,  # X4
    'Government Debt to GDP': -2.191,  # X5
    'GDP per Capita PPP': 0.000,  # X6
    'Trade Balance': -2.044,  # X7
    'Unemployment Rate': -24.262,  # X8
    'Foreign Debt to GDP': 0.198  # X9
}



# Prepare and preprocess data
final_data = prepare_data(data_dict)
processed_data = preprocess_features(final_data)

# Print input data for verification
for idx, row in processed_data.iterrows():
    print("\nInput data for prediction:")
    for col in processed_data.columns:
        if col != 'Country Name':
            print(f"{col}: {row[col]}")

# Make predictions row by row
predictions = []
for idx, row in processed_data.iterrows():
    pred = predict_rating(row, sp_coefficients)
    predictions.append(pred)

# Create results DataFrame
results = pd.DataFrame({
    'Country': final_data['Country Name'],
    'Predicted Rating': predictions
})

# Round predictions to 2 decimal places
results['Predicted Rating'] = results['Predicted Rating'].round(2)

# Sort by predicted rating in descending order
results_sorted = results.sort_values('Predicted Rating', ascending=False)

# Print results
print("\nPredictions:")
print(results_sorted)

# For Kuwait specifically
kuwait_results = results[results['Country'] == 'Kuwait']
if not kuwait_results.empty:
    kuwait_pred = kuwait_results['Predicted Rating'].values[0]
    print(f"\nKuwait Analysis:")
    print(f"Predicted Rating: {kuwait_pred:.2f}")
    print(f"Actual Rating (2017): 21")
    print(f"Difference: {21 - kuwait_pred:.2f} notches")
else:
    print("\nKuwait not found in the dataset")

# Save results
results.to_csv('credit_rating_predictions.csv', index=False)

Available columns: ['Country Name', 'Political Stability', 'GDP', 'Inflation Rate', 'Corruption Index', 'Government Debt to GDP', 'GDP per Capita PPP', 'Trade Balance', 'Unemployment Rate', 'Foreign Debt to GDP']

First few rows of data:
  Country Name  Political Stability       GDP  Inflation Rate  \
0     Bulgaria            59.047619  2.745927        2.061596   
1      Belarus            43.333332  2.531624        6.031837   
2       Belize            48.095238 -1.813365        1.147653   
3       Brazil            29.523809  1.322869        3.446373   
4       Bhutan            88.095238  3.507567        4.955084   

   Corruption Index  Government Debt to GDP  GDP per Capita PPP  \
0         -0.195962               32.235054        21455.875500   
1         -0.240941               39.880180        18413.712740   
2         -0.305804               46.799538         9086.911909   
3         -0.559336               83.669203        14293.604640   
4          1.529063               95

# **So Bad Model**