In [16]:
import pandas as pd
import numpy as np
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import LabelEncoder, RobustScaler
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Load the dataset
df = pd.read_csv("AllYears_Dataframe.csv")
#df=pd.read_csv("data/AllYears_Dataframe.csv")

# Drop unnecessary columns
df = df.drop(columns=['Unnamed: 0.1', 'Unnamed: 0'])

# Function to calculate the rating based on financial ratios (Z-score)
def rating_affectation(row):
    X1 = (row['AssetsCurrent_Totall'] - row['LiabilitiesCurrent_Totall']) / row['assets']
    X2 = row['NetIncomeLoss'] / row['assets']
    X3 = row['OperatingIncomeLoss'] / row['assets']
    X4 = row['StockholdersEquity'] / row['Liabilities']
    X5 = row['Revenues_Totall3'] / row['assets']
    Z = 1.2 * X1 + 1.4 * X2 + 3.3 * X3 + 0.6 * X4 + 0.999 * X5

    if Z >= 6.2:
        return 'AAA'
    elif Z >= 4.7:
        return 'AA'
    elif Z >= 3.7:
        return 'A'
    elif Z >= 2.8:
        return 'BBB'
    elif Z >= 2.4:
        return 'BB'
    elif Z >= 1.8:
        return 'B'
    elif Z >= 0.3:
        return 'CCC'
    elif Z >= -0.2:
        return 'D'
    else:
        return 'D'

# Creating new financial ratios for the model
df_new = pd.DataFrame()
df_new['R1'] = df['DebtCurrent_Totall'] / df['StockholdersEquity']
df_new['R2'] = df['StockholdersEquity'] / df['Liabilities']
df_new['R3'] = df['Liabilities'] / df['assets']
df_new['R4'] = df['StockholdersEquity'] / df['assets']
df_new['R5'] = df['assets'] / df['StockholdersEquity']
df_new['R6'] = df['CashTotall'] / df['assets']
df_new['R7'] = (df['AssetsCurrent_Totall'] - df['LiabilitiesCurrent_Totall']) / df['assets']
df_new['R8'] = df['CashTotall'] / df['Revenues_Totall3']
df_new['R9'] = df['IntangibleAssetsNetExcludingGoodwill'] / df['assets']
df_new['R10'] = df['AssetsCurrent_Totall'] / df['LiabilitiesCurrent_Totall']
df_new['R11'] = df['OperatingIncomeLoss'] / df['Revenues_Totall3']
df_new['R12'] = df['NetIncomeLoss'] / df['assets']
df_new['R13'] = df['NetIncomeLoss'] / df['Revenues_Totall3']
df_new['R14'] = df['NetIncomeLoss'] / df['assets']
df_new['R15'] = df['GrossProfit_Totall'] / df['Revenues_Totall3']
df_new['R16'] = df['OperatingIncomeLoss'] / df['InterestExpense_Totall']
df_new['R17'] = df['LongTermDebt_Totall'] / df['assets']
df_new['R18'] = (df['DebtCurrent_Totall'] + df['LongTermDebt_Totall']) / df['Revenues_Totall3']
df_new['R19'] = (df['DebtCurrent_Totall'] + df['LongTermDebt_Totall']) / (df['CashTotall'] + df['assets'])
df_new['R23'] = df['AccountsPayableAndAccruedLiabilitiesCurrent_Totall'] / df['Revenues_Totall3']
df_new['AV3'] = df['CashTotall']
df_new['AV4'] = df['NetCashProvidedByUsedInOperatingActivities']
df_new['AV5'] = df['NetCashProvidedByUsedInInvestingActivities']
df_new['AV6'] = df['NetCashProvidedByUsedInFinancingActivities']
df_new['Is_Bankrupt'] = df['Is_Bankrupt']
df_new['rating'] = df.apply(rating_affectation, axis=1)

# Replace inf/-inf with NaN and impute missing values
df_new.replace([np.inf, -np.inf], np.nan, inplace=True)
numeric_cols = df_new.select_dtypes(include=[np.number]).columns
imputer = IterativeImputer(random_state=42)
df_new[numeric_cols] = imputer.fit_transform(df_new[numeric_cols])

# Prepare the dataset for model training
X = df_new.drop(['rating'], axis=1)
y = df_new['rating']

# Apply SMOTE for class balancing
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train the Decision Tree Classifier
model = DecisionTreeClassifier(random_state=42)
model.fit(X_train, y_train)

# Model evaluation
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))

# Predict bankruptcy risk function
rating_to_risk = {
    'AAA': 'Very Low Risk',
    'AA': 'Low Risk',
    'A': 'Moderate Risk',
    'BBB': 'Increased Risk',
    'BB': 'High Risk',
    'B': 'Very High Risk',
    'C': 'Severe Risk',
    'D': 'Bankrupt'
}

def predict_bankruptcy(input_data):
    data = pd.DataFrame([input_data], columns=X.columns)
    prediction = model.predict(data)
    rating = prediction[0]
    bankruptcy_risk = rating_to_risk.get(rating, "Unknown Rating")
    print(f"Predicted Rating: {rating}")
    print(f"Bankruptcy Risk: {bankruptcy_risk}")
    return rating, bankruptcy_risk

# Example input data for prediction
input_data = {
    'R1': 1.5, 'R2': 0.28, 'R3': 1.4, 'R4': 0.4, 'R5': 2.5, 'R6': 0.1, 'R7': 0.1,
    'R8': 0.25, 'R9': 0.6, 'R10': 0.16, 'R11': 1.5, 'R12': 0.2, 'R13': 0.5, 'R14': 0.2,
    'R15': 0.25, 'R16': 0.6, 'R17': 0.8, 'R18': 3.5, 'R19': 1.2, 'R23': 2, 'AV3': 5000,
    'AV4': 1000, 'AV5': 100, 'AV6': 20000
}

# Predict bankruptcy risk and rating
predict_bankruptcy(input_data)


FileNotFoundError: [Errno 2] No such file or directory: 'AllYears_Dataframe.csv'