<a href="https://colab.research.google.com/github/Shivachikkonda02/-Bank-Marketing-Analysis-and-Classification-ML-Project/blob/main/AnomaData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# Importing necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

# Step 1: Loading the dataset
data = pd.read_csv("/content/AnomaData.csv")

# Step 2: Exploratory Data Analysis (EDA)
# Check data quality
print(data.info())
print(data.describe())

# Treat missing values if any
data.dropna(inplace=True)

# Step 3: Correcting date datatype
data['time'] = pd.to_datetime(data['time'])

# Step 4: Feature Engineering and Selection
# Assuming no specific feature engineering required

# Step 5: Train/Test Split
X = data.drop(columns=['y', 'y.1'])
y = data['y']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert datetime to numerical representation
X_train['time'] = pd.to_datetime(X_train['time']).astype(int) // 10**9
X_test['time'] = pd.to_datetime(X_test['time']).astype(int) // 10**9

# Step 6: Model Selection
model = RandomForestClassifier()

# Step 7: Model Training
model.fit(X_train, y_train)

# Step 8: Model Evaluation
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

# Step 9: Hyperparameter Tuning/Model Improvement
param_grid = {'n_estimators': [100, 200, 300],
              'max_depth': [None, 10, 20],
              'min_samples_split': [2, 5, 10],
              'min_samples_leaf': [1, 2, 4]}
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3)
grid_search.fit(X_train, y_train)
best_params = grid_search.best_params_
print("Best parameters:", best_params)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1605 entries, 0 to 1604
Data columns (total 62 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   time    1605 non-null   object 
 1   y       1605 non-null   int64  
 2   x1      1605 non-null   float64
 3   x2      1605 non-null   float64
 4   x3      1605 non-null   float64
 5   x4      1605 non-null   float64
 6   x5      1605 non-null   float64
 7   x6      1605 non-null   float64
 8   x7      1605 non-null   float64
 9   x8      1605 non-null   float64
 10  x9      1605 non-null   float64
 11  x10     1605 non-null   float64
 12  x11     1605 non-null   float64
 13  x12     1605 non-null   float64
 14  x13     1605 non-null   float64
 15  x14     1605 non-null   float64
 16  x15     1605 non-null   float64
 17  x16     1605 non-null   float64
 18  x17     1604 non-null   float64
 19  x18     1604 non-null   float64
 20  x19     1604 non-null   float64
 21  x20     1604 non-null   float64
 22  

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


Best parameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 100}


# New Section