In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
data = pd.read_csv('/content/creditcard.csv')

In [None]:
print(data.head())

   Time        V1        V2        V3        V4        V5        V6        V7  \
0   0.0 -1.359807 -0.072781  2.536347  1.378155 -0.338321  0.462388  0.239599   
1   0.0  1.191857  0.266151  0.166480  0.448154  0.060018 -0.082361 -0.078803   
2   1.0 -1.358354 -1.340163  1.773209  0.379780 -0.503198  1.800499  0.791461   
3   1.0 -0.966272 -0.185226  1.792993 -0.863291 -0.010309  1.247203  0.237609   
4   2.0 -1.158233  0.877737  1.548718  0.403034 -0.407193  0.095921  0.592941   

         V8        V9  ...       V21       V22       V23       V24       V25  \
0  0.098698  0.363787  ... -0.018307  0.277838 -0.110474  0.066928  0.128539   
1  0.085102 -0.255425  ... -0.225775 -0.638672  0.101288 -0.339846  0.167170   
2  0.247676 -1.514654  ...  0.247998  0.771679  0.909412 -0.689281 -0.327642   
3  0.377436 -1.387024  ... -0.108300  0.005274 -0.190321 -1.175575  0.647376   
4 -0.270533  0.817739  ... -0.009431  0.798278 -0.137458  0.141267 -0.206010   

        V26       V27       V28 

In [None]:
print(data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 247578 entries, 0 to 247577
Data columns (total 31 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Time    247578 non-null  float64
 1   V1      247578 non-null  float64
 2   V2      247578 non-null  float64
 3   V3      247578 non-null  float64
 4   V4      247578 non-null  float64
 5   V5      247578 non-null  float64
 6   V6      247578 non-null  float64
 7   V7      247578 non-null  float64
 8   V8      247578 non-null  float64
 9   V9      247578 non-null  float64
 10  V10     247578 non-null  float64
 11  V11     247578 non-null  float64
 12  V12     247578 non-null  float64
 13  V13     247578 non-null  float64
 14  V14     247578 non-null  float64
 15  V15     247578 non-null  float64
 16  V16     247578 non-null  float64
 17  V17     247578 non-null  float64
 18  V18     247578 non-null  float64
 19  V19     247578 non-null  float64
 20  V20     247578 non-null  float64
 21  V21     24

In [None]:
print(data['Class'].value_counts())

Class
0.0    247128
1.0       449
Name: count, dtype: int64


In [None]:
X = data.drop('Class', axis=1)
y = data['Class']

In [None]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
fraud = data[data['Class'] == 1]
legit = data[data['Class'] == 0].sample(len(fraud))
balanced_data = pd.concat([fraud, legit])
X_balanced = balanced_data.drop('Class', axis=1)
y_balanced = balanced_data['Class']

In [None]:
X_balanced_scaled = scaler.fit_transform(X_balanced)


In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_balanced_scaled, y_balanced, test_size=0.3, random_state=42
)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix

# Train Logistic Regression model
logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)

# Predict on the test set
y_pred_logistic = logistic_model.predict(X_test)

# Evaluate Logistic Regression model
print("Logistic Regression Performance:")
print(classification_report(y_test, y_pred_logistic))
print(confusion_matrix(y_test, y_pred_logistic))


Logistic Regression Performance:
              precision    recall  f1-score   support

         0.0       0.93      0.96      0.94       132
         1.0       0.96      0.93      0.94       138

    accuracy                           0.94       270
   macro avg       0.94      0.94      0.94       270
weighted avg       0.95      0.94      0.94       270

[[127   5]
 [ 10 128]]


In [None]:
from sklearn.tree import DecisionTreeClassifier

# Train Decision Tree model
decision_tree_model = DecisionTreeClassifier()
decision_tree_model.fit(X_train, y_train)

# Predict on the test set
y_pred_tree = decision_tree_model.predict(X_test)

# Evaluate Decision Tree model
print("Decision Tree Performance:")
print(classification_report(y_test, y_pred_tree))
print(confusion_matrix(y_test, y_pred_tree))


Decision Tree Performance:
              precision    recall  f1-score   support

         0.0       0.93      0.86      0.90       132
         1.0       0.88      0.94      0.91       138

    accuracy                           0.90       270
   macro avg       0.91      0.90      0.90       270
weighted avg       0.91      0.90      0.90       270

[[114  18]
 [  8 130]]


In [None]:
from sklearn.ensemble import RandomForestClassifier

# Train Random Forest model
random_forest_model = RandomForestClassifier()
random_forest_model.fit(X_train, y_train)

# Predict on the test set
y_pred_forest = random_forest_model.predict(X_test)

# Evaluate Random Forest model
print("Random Forest Performance:")
print(classification_report(y_test, y_pred_forest))
print(confusion_matrix(y_test, y_pred_forest))


Random Forest Performance:
              precision    recall  f1-score   support

         0.0       0.93      0.95      0.94       132
         1.0       0.95      0.93      0.94       138

    accuracy                           0.94       270
   macro avg       0.94      0.94      0.94       270
weighted avg       0.94      0.94      0.94       270

[[125   7]
 [ 10 128]]


In [None]:
pip install flask




In [None]:
import pickle
# Save the models
pickle.dump(logistic_model, open('logistic_model.pkl', 'wb'))
pickle.dump(decision_tree_model, open('decision_tree_model.pkl', 'wb'))
pickle.dump(random_forest_model, open('random_forest_model.pkl', 'wb'))

# Save the scaler (used for data preprocessing)
pickle.dump(scaler, open('scaler.pkl', 'wb'))

print("Models and scaler saved as pickle files.")


Models and scaler saved as pickle files.


In [None]:
# Load models
logistic_model = pickle.load(open('logistic_model.pkl', 'rb'))
decision_tree_model = pickle.load(open('decision_tree_model.pkl', 'rb'))
random_forest_model = pickle.load(open('random_forest_model.pkl', 'rb'))

# Load scaler
scaler = pickle.load(open('scaler.pkl', 'rb'))

print("Models and scaler loaded successfully.")


Models and scaler loaded successfully.
