In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.preprocessing import LabelEncoder
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

# Step 1: Load the CSV file
file_path = '/kaggle/input/green-bond-synthetically-generated-dataset-1/synthetic_green_bond_data_with_real_issuers (2).csv'
data = pd.read_csv(file_path)

# Display the first few rows of the dataset
print(data.head())

# Step 2: Handling Missing Values
data = data.dropna()

# Step 3: Drop specified columns
columns_to_drop = ['Issuer_Location', 'Issuer_Sector', 'Credit_Rating']
data_dropped = data.drop(columns=columns_to_drop)

# Step 4: Label Encode 'Risk_Involved' Feature
label_encoder = LabelEncoder()
data_dropped['Risk_Involved'] = label_encoder.fit_transform(data_dropped['Risk_Involved'])

# Step 5: Prepare the data for modeling
X = data_dropped.drop('Risk_Involved', axis=1)
y = data_dropped['Risk_Involved']
X_train, X_test, y_train, y_test = train_test_split(X.drop(columns=['Issuer_Name']), y, test_size=0.2, random_state=42)

# Step 6: Train the RandomForestClassifier model
rf_classifier = RandomForestClassifier(random_state=42)
rf_classifier.fit(X_train, y_train)

# Step 7: Make predictions and evaluate the model
y_pred = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))
print("Confusion Matrix:")
conf_matrix = confusion_matrix(y_test, y_pred)
print(conf_matrix)

# Step 8: Plot the confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(conf_matrix, annot=True, fmt="d", cmap="Blues", xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix')
plt.show()

# Step 9: Function to predict risk for a given bond name
def predict_risk_for_bond(bond_name):
    bond_data = data_dropped[data_dropped['Issuer_Name'] == bond_name]
    if bond_data.empty:
        print("Bond name not found in the dataset.")
        return None
    bond_features = bond_data.drop(columns=['Risk_Involved', 'Issuer_Name'])
    predicted_risk = rf_classifier.predict(bond_features)
    predicted_risk_category = label_encoder.inverse_transform(predicted_risk)
    return predicted_risk_category[0]

# Step 10: Take bond name input from the user and predict the risk
bond_name = input("Enter the bond name: ")
predicted_risk = predict_risk_for_bond(bond_name)
if predicted_risk is not None:
    print(f"The predicted risk for the bond '{bond_name}' is: {predicted_risk}")

# Step 11: Generate pickle file
model_data = {
    'model': rf_classifier,
    'label_encoder': label_encoder,
    'features': X.drop(columns=['Issuer_Name']).columns.tolist()
}

with open('green_bond_risk_model.pkl', 'wb') as file:
    pickle.dump(model_data, file)

print("Model saved as 'green_bond_risk_model.pkl'")

# Step 12: Function to load the model and make predictions
def load_model_and_predict(bond_data):
    with open('green_bond_risk_model.pkl', 'rb') as file:
        model_data = pickle.load(file)
    
    model = model_data['model']
    label_encoder = model_data['label_encoder']
    features = model_data['features']
    
    bond_features = bond_data[features]
    predicted_risk = model.predict(bond_features)
    predicted_risk_category = label_encoder.inverse_transform(predicted_risk)
    return predicted_risk_category[0]
