In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load the datasets
train_data=pd.read_csv('C:/Users/sathi/Downloads/creditcard/fraud dataset/fraudTrain.csv',engine='python',on_bad_lines='skip')
test_data=pd.read_csv('C:/Users/sathi/Downloads/creditcard/fraud dataset/fraudTest.csv',engine='python',on_bad_lines='skip')

# Drop irrelevant/sensitive columns
drop_cols = [
    'Unnamed: 0', 
    'cc_num', 'first', 'last', 'street', 'trans_date_trans_time',
    'dob', 'trans_num'
]

train_data.drop(columns=drop_cols,inplace=True,errors='ignore')
test_data.drop(columns=drop_cols,inplace=True,errors='ignore')

# Drop rows with missing values
train_data.dropna(inplace=True)
test_data.dropna(inplace=True)

# Separate features and target
X_train=train_data.drop(columns=['is_fraud'])
y_train=train_data['is_fraud']
X_test=test_data.drop(columns=['is_fraud'])
y_test=test_data['is_fraud']

# Convert categorical features to numeric using one-hot encoding
combined=pd.concat([X_train, X_test],axis=0)
combined_encoded=pd.get_dummies(combined,drop_first=True)

# Split back to train/test after encoding
X_train_encoded=combined_encoded[:len(X_train)]
X_test_encoded=combined_encoded[len(X_train):]

#  Save feature names BEFORE scaling (important!)
feature_names=X_train_encoded.columns.tolist()

# Feature Scaling
scaler=StandardScaler()
X_train_scaled=scaler.fit_transform(X_train_encoded)
X_test_scaled=scaler.transform(X_test_encoded)

# Train Random Forest model with class_weight='balanced'
rf_model=RandomForestClassifier(class_weight='balanced',n_estimators=100,random_state=42)
rf_model.fit(X_train_scaled, y_train)

# Make predictions
y_pred=rf_model.predict(X_test_scaled)

# Evaluate
print("Accuracy:",accuracy_score(y_test, y_pred))
print("Classification Report:\n",classification_report(y_test, y_pred))



Accuracy: 0.9953846153846154
Classification Report:
               precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      3882
         1.0       0.00      0.00      0.00        18

    accuracy                           1.00      3900
   macro avg       0.50      0.50      0.50      3900
weighted avg       0.99      1.00      0.99      3900

Model saved as fraud_detection_model.pkl
Scaler saved as scaler.pkl


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Feature names saved as feature_names.pkl


In [3]:
test_data

Unnamed: 0,merchant,category,amt,gender,city,state,zip,lat,long,city_pop,job,unix_time,merch_lat,merch_long,is_fraud
0,fraud_Kirlin and Sons,personal_care,2.86,M,Columbia,SC,29209,33.9659,-80.9355,333497.0,Mechanical engineer,1.371817e+09,33.986391,-81.200714,0.0
1,fraud_Sporer-Keebler,personal_care,29.84,F,Altonah,UT,84002,40.3207,-110.436,302.0,"Sales professional, IT",1.371817e+09,39.450498,-109.960431,0.0
2,"fraud_Swaniawski, Nitzsche and Welch",health_fitness,41.28,F,Bellmore,NY,11710,40.6729,-73.5365,34496.0,"Librarian, public",1.371817e+09,40.495810,-74.196111,0.0
3,fraud_Haley Group,misc_pos,60.05,M,Titusville,FL,32780,28.5697,-80.8191,54767.0,Set designer,1.371817e+09,28.812398,-80.883061,0.0
4,fraud_Johnston-Casper,travel,3.19,M,Falmouth,MI,49632,44.2529,-85.01700000000001,1126.0,Furniture designer,1.371817e+09,44.959148,-85.884734,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3895,"fraud_Witting, Beer and Ernser",home,70.13,M,Riverton,WY,82501,43.0351,-108.2024,19408.0,"Lecturer, higher education",1.371908e+09,42.143839,-107.251447,0.0
3896,"fraud_Koss, Hansen and Lueilwitz",home,76.72,F,De Soto,KS,66018,38.9462,-94.9714,5760.0,Chief Executive Officer,1.371908e+09,38.267088,-95.607297,0.0
3897,fraud_Leannon-Nikolaus,travel,477.66,M,Smock,PA,15480,39.9961,-79.7678,1946.0,Charity fundraiser,1.371908e+09,39.201413,-79.401124,0.0
3898,"fraud_Koss, Hansen and Lueilwitz",home,217.07,M,Edmond,OK,73034,35.6665,-97.4798,116001.0,Media buyer,1.371908e+09,35.763099,-98.407189,0.0


In [5]:
train_data


Unnamed: 0,merchant,category,amt,gender,city,state,zip,lat,long,city_pop,job,unix_time,merch_lat,merch_long,is_fraud
0,"fraud_Rippin, Kub and Mann",misc_net,4.97,F,Moravian Falls,NC,28654,36.0788,-81.1781,3495,"Psychologist, counselling",1325376018,36.011293,-82.048315,0
1,"fraud_Heller, Gutmann and Zieme",grocery_pos,107.23,F,Orient,WA,99160,48.8878,-118.2105,149,Special educational needs teacher,1325376044,49.159047,-118.186462,0
2,fraud_Lind-Buckridge,entertainment,220.11,M,Malad City,ID,83252,42.1808,-112.2620,4154,Nature conservation officer,1325376051,43.150704,-112.154481,0
3,"fraud_Kutch, Hermiston and Farrell",gas_transport,45.00,M,Boulder,MT,59632,46.2306,-112.1138,1939,Patent attorney,1325376076,47.034331,-112.561071,0
4,fraud_Keeling-Crist,misc_pos,41.96,M,Doe Hill,VA,24433,38.4207,-79.4629,99,Dance movement psychotherapist,1325376186,38.674999,-78.632459,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3905,fraud_Bednar Group,misc_net,5.68,M,De Queen,AR,71832,34.0442,-94.3386,11253,"Scientist, biomedical",1325582884,34.491602,-94.956069,0
3906,"fraud_Lesch, D'Amore and Brown",food_dining,91.44,M,De Queen,AR,71832,34.0442,-94.3386,11253,"Scientist, biomedical",1325582888,34.560693,-94.157388,0
3907,"fraud_Langworth, Boehm and Gulgowski",shopping_net,108.83,F,Sixes,OR,97476,42.8250,-124.4409,217,Retail merchandiser,1325582978,42.325433,-125.336778,0
3908,"fraud_Okuneva, Schneider and Rau",shopping_pos,154.83,M,Oaks,PA,19456,40.1334,-75.4536,737,Oceanographer,1325582993,40.751155,-76.298888,0
