# dataset exploration

In [None]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import numpy as np
import geopandas as gpd
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [None]:
df=pd.read_csv('/kaggle/input/american-companies-bankruptcy-prediction-dataset/american_bankruptcy.csv')
df.head()

In [None]:
df.rename(columns={'X1':'Current assets','X2':'Cost of goods sold','X3':'Depreciation and amortization','X4':'EBITDA','X5':'Inventory','X6':'Net Income','X7':'Total Receivables','X8':'Market value','X9':'Net sales','X10':'Total assets','X11':'Total Long-term debt','X12':'EBIT','X13':'Gross Profit','X14':'Total Current Liabilities','X15':'Retained Earnings','X16':'Total Revenue','X17':'Total Liabilities','X18':'Total Operating Expenses'},inplace=True)

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.describe(include='object')

In [None]:
df.isnull().sum()

In [None]:
df.duplicated().sum()

In [None]:
df['status_label'].unique()

In [None]:
df_numeric=df.copy()
df_numeric.drop(columns=['company_name'],inplace=True)

In [None]:
def binaryclass(row):
    if row['status_label'] == 'alive':
        return(1)
    else:
        return(0)
df_numeric['status_label']=df.apply(binaryclass,axis=1)
df_numeric.head()

# EDA

In [None]:
plt.figure(figsize=(30,30))
corr = df_numeric.corr()


sns.heatmap(corr, 
            xticklabels=corr.columns,
            yticklabels=corr.columns,
            cmap='coolwarm',
            annot=True,
            linewidths=.5)
 
plt.title('Correlation Matrix')
# plt.xticks(rotation=90)
# plt.yticks(rotation=0) 

plt.show()

In [None]:
xtick_positions = [2000,2005,2008,2010, 2012, 2014, 2016, 2018]
# xtick_labels = [2'2010', '2012', '2014', '2016', '2018']

# Set xticks with positions and labels
plt.xticks(xtick_positions)
df_numeric.groupby(['year'])['Gross Profit'].mean().plot()

In [None]:
# Calculate mean by year for all numeric features
df_means = df_numeric.groupby('year').mean()

# Get number of rows and columns for subplots based on feature count
n_features = len(df_means.columns)
n_rows = (n_features // 5) + 1  # Assuming 5 features per row (adjust as needed)
n_cols = min(5, n_features)  # Maximum 5 columns per row

# Create the figure and subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 10))  # Adjust figsize as desired


# Loop through features and plot on subplots
feature_index = 0
for row in range(n_rows):
  for col in range(n_cols):
    if feature_index >= n_features:
      break  # No more features, exit loop
    feature_name = df_means.columns[feature_index]
    df_means[feature_name].plot(kind='line', ax=axes[row, col])
    axes[row, col].set_title(feature_name)
    feature_index += 1

# Adjust layout (optional)
plt.tight_layout()

# Show the plot
plt.show()

## decrease in all revenues in 2009
* 2007-2008: The housing bubble in the US began to burst, leading to defaults on mortgages and losses for financial institutions. This triggered a domino effect, causing a credit freeze and a decline in overall economic activity.
* 2008: The crisis reached its peak, with major financial institutions facing collapse and government intervention needed to stabilize the financial system.
* 2009 and beyond: The recession officially began in December 2007 (according to the National Bureau of Economic Research) and continued through much of 2009. The effects of the crisis, including decreased consumer spending and business investment, were felt throughout this period and even in later years.
### Therefore, it's very likely that the profit deficits observed for the year 2009 are a consequence of the 2008 financial crisis. The crisis had a significant impact on businesses across various sectors, leading to decreased sales, increased expenses, and ultimately, losses in many cases.




## The Treasury recently reported that the federal government recorded a total budget deficit of \\$1.4 trillion in fiscal year 2009, about \\$960 billion more than the deficit incurred in 2008. CBO notes, in its latest Monthly Budget Review, that the federal deficit rose as a share of the nations gross domestic product (GDP) from 3.1 percent in 2008 to 9.9 percent in 2009the highest deficit as a share of GDP since 1945.
[www.cbo.gov/publication/24992](https://)

In [None]:
df.groupby(['company_name'])['Net Income'].mean().sort_values(ascending=False).head().plot(kind='bar')

In [None]:
df.groupby(['company_name'])['Total Revenue'].mean().sort_values(ascending=False).head().plot(kind='bar')

In [None]:
df_numeric.plot(kind = "box" , subplots = True , figsize = (20,20) , layout = (4,5))
plt.show()

* We have a lot of outliers.
* Outliers will not be removed, because when I did so, there were no bankrupt companies left in the database. So I preferred to keep all the data, considering that in the outliers there could be important information of 1 (bankruptcy).

In [None]:
# Select all numerical features (replace 'numeric_features' with your actual list if needed)
numeric_features = [col for col in df.columns if df[col].dtype != object]

# Define number of rows and columns for the subplot grid (adjust based on your number of features)
n_rows = 4  # Adjust as needed
n_cols = 5  # Adjust as needed

# Create a figure and subplots using plt.subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 10))

# Iterate through features and create scatter plots on subplots
feature_count = 0
for i in range(n_rows):
    for j in range(n_cols):
        if feature_count >= len(numeric_features):
            break  # No more features to plot, exit loop

        # Access the current subplot using axes[i, j]
        ax = axes[i, j]

        # Scatter plot on current subplot
        ax.scatter(df['year'], df[numeric_features[feature_count]], c=df_numeric['status_label'], cmap='cool', alpha=0.3)

        # Add horizontal line at y=0
        ax.axhline(0, c='black', ls='--')

        # Feature label and title (optional)
        ax.set_xlabel('Year')
        ax.set_ylabel(numeric_features[feature_count] + ' Value')
        ax.set_title(numeric_features[feature_count] + ' vs. Year')

        feature_count += 1

# Adjust layout to prevent overlapping elements
plt.tight_layout()

# Colorbar for status_label (optional, position outside subplots)
# You can adjust the position using fig.colorbar(...,の位置)
fig.colorbar(label='Status Label', ax=axes.ravel())

# Show the plot
plt.show()

# preprocessing

In [None]:
df2 = df[df.groupby(['company_name'])['status_label'].transform('nunique') > 1]
df2

## there are no companies with the same name that changed status
### i.e alive companies that turned bankrupt or bankrupt companies that started working

In [None]:
df['company_name'].value_counts().count()

## number of actual companies

In [None]:
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
features = ['Net Income', 'Market value', 'Total Revenue']
X = df[features].values

# Instantiate the clustering model and visualizer
km = KMeans(random_state=42)
visualizer = KElbowVisualizer(km, k=(2,10))
 
visualizer.fit(X)        # Fit the data to the visualizer
visualizer.show() 

In [None]:
kmeans = KMeans(n_clusters = 4, init = 'k-means++', random_state = 42)
y_kmeans = kmeans.fit_predict(X)
# Visualising the clusters
plt.scatter(X[y_kmeans == 0,0 ], X[y_kmeans == 0,1], s = 100, c = 'red', label = 'Cluster 1')
plt.scatter(X[y_kmeans == 1 ,0], X[y_kmeans == 1,1], s = 100, c = 'blue', label = 'Cluster 2')
plt.scatter(X[y_kmeans == 2 ,0], X[y_kmeans == 2,1], s = 100, c = 'green', label = 'Cluster 3')
plt.scatter(X[y_kmeans == 3,0], X[y_kmeans == 3,1], s = 100, c = 'cyan', label = 'Cluster 4')
# plt.scatter(X[y_kmeans == 4, 0], X[y_kmeans == 4, 1], s = 100, c = 'magenta', label = 'Cluster 5')
# plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s = 300, c = 'yellow', label = 'Centroids')
plt.title('Clusters of customers')
plt.xlabel('Annual Income (k$)')
plt.ylabel('Spending Score (1-100)')
plt.legend()
plt.show()

In [None]:
import plotly.graph_objects as go
Scene = dict(xaxis = dict(title  = 'Net Income -->'),yaxis = dict(title  = 'Market Value--->'),zaxis = dict(title  = 'Total Revenue-->'))

labels = kmeans.labels_
trace = go.Scatter3d(x=X[:, 0], y=X[:, 1], z=X[:, 2], mode='markers',marker=dict(color = labels, size= 10, line=dict(color= 'black',width = 10)))
layout = go.Layout(margin=dict(l=0,r=0),scene = Scene,height = 800,width = 800)
data = [trace]
fig = go.Figure(data = data, layout = layout)
fig.show()

### tried clustering

In [None]:
df_numeric.groupby('status_label').size().plot(kind='pie',
                                       autopct='%.1f%%',
                                       fontsize=13,
                                                labels=['bankrupt','not bankrupt'],
                                       colors=['skyblue', 'tomato'])

In [None]:
from imblearn.over_sampling import SMOTE
# Resampling the minority class. The strategy can be changed as required.
sm = SMOTE(sampling_strategy='minority', random_state=42)
# Fit the model to generate the data.
oversampled_X, oversampled_Y = sm.fit_resample(df_numeric.drop('status_label', axis=1,inplace=False), df_numeric['status_label'])
oversampled = pd.concat([pd.DataFrame(oversampled_Y), pd.DataFrame(oversampled_X)], axis=1)

In [None]:
oversampled.groupby('status_label').size().plot(kind='pie',
                                       autopct='%.1f%%',
                                       fontsize=13,
                                                labels=['bankrupt','not bankrupt'],
                                       colors=['skyblue', 'tomato'])

In [None]:
 df.status_label.value_counts()

In [None]:
df2=df.groupby('company_name').agg({'status_label': 'first'})

In [None]:
df2.status_label.value_counts()

In [None]:
# Filter for failed companies (status_label=0)
failed_companies = df[df['status_label'] == 'failed']

# Remove duplicates for alive companies (status_label=1) based on company_name only
alive_companies = df[df['status_label'] == 'alive'].drop_duplicates(subset='company_name')

# Combine alive and unique failed companies
all_companies = pd.concat([alive_companies, failed_companies], ignore_index=True)

# Print the resulting DataFrame
all_companies.status_label.value_counts()

In [None]:
all_companies['status_label']=all_companies.apply(binaryclass,axis=1)

In [None]:
all_companies.head()

In [None]:
all_companies.groupby('status_label').size().plot(kind='pie',
                                       autopct='%.1f%%',
                                       fontsize=13,
                                                labels=['bankrupt','not bankrupt'],
                                       colors=['skyblue', 'tomato'])

In [None]:
sns.pairplot(data=all_companies,hue='status_label',kind='scatter')

In [None]:
# Select all numerical features (replace 'numeric_features' with your actual list if needed)
numeric_features = [col for col in all_companies.columns if df[col].dtype != object]

# Define number of rows and columns for the subplot grid (adjust based on your number of features)
n_rows = 4  # Adjust as needed
n_cols = 5  # Adjust as needed

# Create a figure and subplots using plt.subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 10))

# Iterate through features and create scatter plots on subplots
feature_count = 0
for i in range(n_rows):
    for j in range(n_cols):
        if feature_count >= len(numeric_features):
            break  # No more features to plot, exit loop

        # Access the current subplot using axes[i, j]
        ax = axes[i, j]

        # Scatter plot on current subplot
        ax.scatter(all_companies['year'], all_companies[numeric_features[feature_count]], c=all_companies['status_label'], cmap='cool', alpha=0.3)

        # Add horizontal line at y=0
        ax.axhline(0, c='black', ls='--')

        # Feature label and title (optional)
        ax.set_xlabel('Year')
        ax.set_ylabel(numeric_features[feature_count] + ' Value')
        ax.set_title(numeric_features[feature_count] + ' vs. Year')

        feature_count += 1

# Adjust layout to prevent overlapping elements
plt.tight_layout()

# Colorbar for status_label (optional, position outside subplots)
# You can adjust the position using fig.colorbar(...,の位置)
fig.colorbar(label='Status Label', ax=axes.ravel())

# Show the plot
plt.show()

In [None]:
all_companies.drop(columns=['company_name'],inplace=True)

In [None]:
# Calculate mean by year for all numeric features
df_means = all_companies.groupby('year').mean()

# Get number of rows and columns for subplots based on feature count
n_features = len(df_means.columns)
n_rows = (n_features // 5) + 1  # Assuming 5 features per row (adjust as needed)
n_cols = min(5, n_features)  # Maximum 5 columns per row

# Create the figure and subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(15, 10))  # Adjust figsize as desired


# Loop through features and plot on subplots
feature_index = 0
for row in range(n_rows):
  for col in range(n_cols):
    if feature_index >= n_features:
      break  # No more features, exit loop
    feature_name = df_means.columns[feature_index]
    df_means[feature_name].plot(kind='line', ax=axes[row, col])
    axes[row, col].set_title(feature_name)
    feature_index += 1

# Adjust layout (optional)
plt.tight_layout()

# Show the plot
plt.show()

In [None]:
plt.figure(figsize=(30,30))
corr = all_companies.corr()


sns.heatmap(corr, 
            xticklabels=corr.columns,
            yticklabels=corr.columns,
            cmap='coolwarm',
            annot=True,
            linewidths=.5)
 
plt.title('Correlation Matrix')
# plt.xticks(rotation=90)
# plt.yticks(rotation=0) 

plt.show()

In [None]:
has_negatives = all_companies.lt(0).any()

# has_negatives is a Series containing True/False for each column

# Print column names with negative values (optional)
if has_negatives.any():
  negative_cols = has_negatives[has_negatives].index.tolist()
  print("Columns with negative values:", negative_cols)

Profitability Ratios:

* Net Profit Margin: (Net Income) / (Revenue) - Measures the percentage of revenue that is converted into profit.
* Return on Equity (ROE): (Net Income) / (Shareholder Equity) - Measures the return on investment for shareholders.
* Return on Assets (ROA): (Net Income) / (Total Assets) - Measures the efficiency of using assets to generate profit.
* Gross Profit Margin: (Gross Profit) / (Net Sales) - Measures the profit remaining after accounting for the cost of goods sold.
* Return on Sales (ROS): (Net Income) / (Net Sales) - Similar to Net Profit Margin, but expressed as a percentage.


Liquidity Ratios:

* Current Ratio: (Current Assets) / (Current Liabilities) - Measures a company's ability to pay off short-term liabilities with its current assets.
* Quick Ratio: (Current Assets - Inventory) / (Current Liabilities) - A more conservative measure of liquidity that excludes inventory from current assets.
* Cash Ratio: (Cash and Cash Equivalents) / (Current Liabilities) - The most stringent liquidity measure, indicating a company's ability to pay off short-term liabilities solely with cash.

Solvency Ratios:

* Debt-to-Equity Ratio: (Total Liabilities) / (Shareholder Equity) - Measures a company's financial leverage and its reliance on debt financing.
* Debt-to-Asset Ratio: (Total Liabilities) / (Total Assets) - Indicates the proportion of a company's assets financed by debt.

Activity Ratios:

* Inventory Turnover: (Cost of Goods Sold) / (Average Inventory) - Measures how efficiently a company is selling its inventory.
* Receivables Turnover: (Revenue) / (Average Accounts Receivable) - Measures how efficiently a company is collecting payments from customers.


Additional Ratios (if applicable):

* EBITDA Margin: (EBITDA) / (Revenue) - Earnings Before Interest, Taxes, Depreciation, and Amortization margin, a measure of profitability excluding non-cash expenses.
* Price-to-Earnings Ratio (P/E Ratio): (Stock Price) / (Earnings per Share) - A market valuation metric used to compare companies within the same industry.

In [None]:
all_companies.columns

In [None]:
all_companies['Net Profit Margin']=all_companies['Net Income']/all_companies['Total Revenue']
all_companies['Gross Profit Margin']=all_companies['Gross Profit']/all_companies['Net sales']
all_companies['ROA']=all_companies['Net Income']/all_companies['Total assets']
all_companies['ROS']=all_companies['Net Income']/all_companies['Net sales']
all_companies['Current Ratio']=all_companies['Current assets']/all_companies['Total Current Liabilities']
all_companies['Quick Ratio']=(all_companies['Current assets']-all_companies['Inventory'])/all_companies['Total Current Liabilities']
all_companies['Debt to asset ratio']=all_companies['Total Liabilities']/all_companies['Total assets']


In [None]:
plt.figure(figsize=(30,30))
corr = all_companies.corr()


sns.heatmap(corr, 
            xticklabels=corr.columns,
            yticklabels=corr.columns,
            cmap='coolwarm',
            annot=True,
            linewidths=.5)
 
plt.title('Correlation Matrix')
# plt.xticks(rotation=90)
# plt.yticks(rotation=0) 

plt.show()

In [None]:
all_companies.info()

In [None]:
# Select all numerical features (replace 'numeric_features' with your actual list if needed)
numeric_features = [col for col in all_companies.columns if all_companies[col].dtype != object]

# Define number of rows and columns for the subplot grid (adjust based on your number of features)
n_rows = 7  # Adjust as needed
n_cols = 4  # Adjust as needed

# Create a figure and subplots using plt.subplots
fig, axes = plt.subplots(n_rows, n_cols, figsize=(20, 20))

# Iterate through features and create scatter plots on subplots
feature_count = 0
for i in range(n_rows):
    for j in range(n_cols):
        if feature_count >= len(numeric_features):
            break  # No more features to plot, exit loop

        # Access the current subplot using axes[i, j]
        ax = axes[i, j]

        # Scatter plot on current subplot
        ax.scatter(all_companies['year'], all_companies[numeric_features[feature_count]], c=all_companies['status_label'], cmap='cool', alpha=0.3)

        # Add horizontal line at y=0
        ax.axhline(0, c='black', ls='--')

        # Feature label and title (optional)
        ax.set_xlabel('Year')
        ax.set_ylabel(numeric_features[feature_count] + ' Value')
        ax.set_title(numeric_features[feature_count] + ' vs. Year')

        feature_count += 1

# Adjust layout to prevent overlapping elements
plt.tight_layout()

# Colorbar for status_label (optional, position outside subplots)
# You can adjust the position using fig.colorbar(...,の位置)
fig.colorbar(label='Status Label', ax=axes.ravel())

# Show the plot
plt.show()

In [None]:
all_companies.columns

# model training and results

In [None]:
x = all_companies.drop(["status_label"] , axis = 1).values
# ,'cluster_label','Net sales','Current Ratio','Quick Ratio','Debt to asset ratio','Total Long-term debt','Total Receivables','Market value','Gross Profit'
#                        ,'EBIT','Total Revenue','Net Profit Margin','Gross Profit Margin','Cost of goods sold'
# x=X.drop(['status_label'],axis=1).values
y = all_companies["status_label"].values
x_train , x_test , y_train ,y_test = train_test_split(x,y , test_size= 0.25 , random_state= 42)

In [None]:
from sklearn.feature_selection import f_classif
feature_names=all_companies.drop(columns=['status_label']).columns
# Create f_classif object to calculate F-value
f_value = f_classif(x, y)

# Print the name and F-value of each feature
for feature in zip(feature_names, f_value[0]):
    print(feature)

In [None]:
# Create a bar chart for visualizing the F-values
plt.figure(figsize=(4,4))
plt.bar(x=feature_names, height=f_value[0], color='tomato')
plt.xticks(rotation='vertical')
plt.ylabel('F-value')
plt.title('F-value Comparison')
plt.show()

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(x_train)
x_train_scaled = scaler.transform(x_train)
x_test_scaled = scaler.transform(x_test)

In [None]:
from sklearn.metrics import classification_report
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
for i in range (1,10,2):
  neigh = KNeighborsClassifier(n_neighbors=i)
  neigh.fit(x_train_scaled,y_train)
  predict = neigh.predict(x_test_scaled)
  print("classification report of k= ", i)
  print(classification_report(predict,y_test))

In [None]:
from sklearn.naive_bayes import GaussianNB
nb = GaussianNB()

# Fit the model
nb.fit(x_train, y_train)

y_pred = nb.predict(x_test)

cr = classification_report(y_test, y_pred)
print("\n\nClassification Report\n")
print(cr)

In [None]:
from sklearn.linear_model import SGDClassifier
from sklearn import metrics
linear_clf = SGDClassifier(class_weight='balanced')

# Train the classifier using fit() function
linear_clf.fit(x_train_scaled, y_train)

y_test_pred = linear_clf.predict(x_test_scaled)
cm=metrics.confusion_matrix(y_test,y_test_pred)
disp=metrics.ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()

In [None]:
print(classification_report(y_test_pred,y_test))

In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV
dt=DecisionTreeClassifier(random_state=42,class_weight='balanced',)
param_grid = { 
    'max_features': [ 'sqrt', 'log2'],
    'criterion' :['gini', 'entropy','log_loss']
}
CV_dt = GridSearchCV(estimator=dt, param_grid=param_grid, cv= 5)
CV_dt.fit(x_train, y_train)
print(CV_dt.best_params_)
accuracy = CV_dt.best_score_ *100
print("Accuracy for our training dataset with tuning is : {:.2f}%".format(accuracy) )
print(classification_report(y_test,CV_dt.best_estimator_.predict(x_test)))

In [None]:
cm=metrics.confusion_matrix(y_test,CV_dt.best_estimator_.predict(x_test))
disp=metrics.ConfusionMatrixDisplay(confusion_matrix=cm)
disp.plot()

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
grid={"C":np.logspace(-3,3,7), "penalty":["l2"],'solver':['newton-cg', 'lbfgs']}# l1 lasso l2 ridge
logreg=LogisticRegression(random_state=42,max_iter=10000)
logreg_cv=GridSearchCV(logreg,grid,cv=5)
logreg_cv.fit(x_train_scaled,y_train)
print("tuned hpyerparameters :(best parameters) ",logreg_cv.best_params_)
logregAccuracy = logreg_cv.best_score_ *100
print("Accuracy for our training dataset with tuning is : {:.2f}%".format(logregAccuracy) )
print(classification_report(y_test,logreg_cv.best_estimator_.predict(x_test_scaled)))

In [None]:
from xgboost import XGBClassifier
params = { 
    'n_estimators': [100,200, 500],
    'learning_rate': [0.1, 0.01, 0.05]
}

XGB_model = XGBClassifier(seed=42)
xgbcv=GridSearchCV(estimator=XGB_model,param_grid=params,cv=5)
xgbcv.fit(x_train, y_train)

print(xgbcv.best_params_)
accuracy = xgbcv.best_score_ *100
print("Accuracy for our training dataset with tuning is : {:.2f}%".format(accuracy) )
print(classification_report(y_test,xgbcv.best_estimator_.predict(x_test)))

In [None]:
from xgboost import XGBClassifier
XGB_model = XGBClassifier(seed=42,n_estimators=1000,learning_rate=0.05)
XGB_model.fit(x_train, y_train)
print(classification_report(y_test,XGB_model.predict(x_test)))

In [None]:
from sklearn.ensemble import RandomForestClassifier
rfc=RandomForestClassifier(random_state=42)
param_grid = { 
    'n_estimators': [100,200, 500],
    'max_features': [ 'sqrt', 'log2'],
#     'max_depth' : [4,5,6,7,8],
    'criterion' :['gini', 'entropy']
}
CV_rfc = GridSearchCV(estimator=rfc, param_grid=param_grid, cv= 5)
CV_rfc.fit(x_train, y_train)
print(CV_rfc.best_params_)
accuracy = CV_rfc.best_score_ *100
print("Accuracy for our training dataset with tuning is : {:.2f}%".format(accuracy) )
print(classification_report(y_test,CV_rfc.best_estimator_.predict(x_test)))

In [None]:
from sklearn.svm import SVC
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'gamma': ['scale','auto'],
              'kernel': ['rbf','linear']} 
              # ,'linear','sigmoid'
  
grid = GridSearchCV(SVC(random_state=42,class_weight='balanced',), param_grid, refit = True, verbose = 3)
  
# fitting the model for grid search
grid.fit(x_train_scaled, y_train)

In [None]:
print(grid.best_params_)
accuracy = grid.best_score_ *100
print("Accuracy for our training dataset with tuning is : {:.2f}%".format(accuracy) )
print(classification_report(y_test,grid.best_estimator_.predict(x_test_scaled)))

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import AdaBoostClassifier
ada=AdaBoostClassifier(random_state=42)
params = { 
    'n_estimators': [100,200, 500],
    'learning_rate': [0.1, 0.01, 0.05]
}
adacv=GridSearchCV(estimator=ada,param_grid=params,cv=5)
adacv.fit(x_train, y_train)
print(adacv.best_params_)
accuracy = adacv.best_score_ *100
print("Accuracy for our training dataset with tuning is : {:.2f}%".format(accuracy) )
print(classification_report(y_test,adacv.best_estimator_.predict(x_test)))