In [2]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.36.0-py2.py3-none-any.whl (8.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.6/8.6 MB[0m [31m21.9 MB/s[0m eta [36m0:00:00[0m
Collecting gitpython!=3.1.19,<4,>=3.0.7 (from streamlit)
  Downloading GitPython-3.1.43-py3-none-any.whl (207 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m207.3/207.3 kB[0m [31m20.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pydeck<1,>=0.8.0b4 (from streamlit)
  Downloading pydeck-0.9.1-py2.py3-none-any.whl (6.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.9/6.9 MB[0m [31m50.2 MB/s[0m eta [36m0:00:00[0m
Collecting watchdog<5,>=2.1.5 (from streamlit)
  Downloading watchdog-4.0.1-py3-none-manylinux2014_x86_64.whl (83 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m83.0/83.0 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
Collecting gitdb<5,>=4.0.1 (from gitpython!=3.1.19,<4,>=3.0.7->streamlit)
  Downloading gitdb-4

In [11]:
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from imblearn.over_sampling import SMOTE
from sklearn.metrics import make_scorer, matthews_corrcoef
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Streamlit UI
st.title("Fraud Detection System")

# Use the uploaded file path
file_path = st.file_uploader("Upload your dataset", type=["csv"])

if file_path:
    df = load_data(file_path)
    X_res, y_res, X_test, y_test, feature_names = preprocess_data(df)

    classifier = st.selectbox("Choose Classifier",
                              ["SGDClassifier", "RandomForestClassifier", "LogisticRegression", "K-NearestNeighbors"])

    if st.button("Train Model"):
        st.write(f"<span style='color:yellow'>Training model: {classifier}</span>", unsafe_allow_html=True)
        model = train_evaluate_model(classifier, X_res, y_res, X_test, y_test)
        st.session_state['model'] = model
        st.session_state['X_train'] = X_res
        st.session_state['y_train'] = y_res
        st.session_state['mean_values'] = df.mean().to_dict()
        st.session_state['feature_names'] = feature_names
        st.success(f"{classifier} trained successfully!")

    if 'model' in st.session_state:
        model = st.session_state['model']
        mean_values = st.session_state['mean_values']
        feature_names = st.session_state['feature_names']

        # Ask user for transaction input
        st.write("<span style='color:orange'>Enter transaction details for prediction (other features will be set to their mean values):</span>", unsafe_allow_html=True)
        with st.form("input_form"):
            input_data = {}
            user_features = ["amount", "oldbalanceOrg", "newbalanceOrig", "oldbalanceDest", "newbalanceDest", "unusuallogin"]
            for col in user_features:
                input_data[col] = st.number_input(f"Enter {col}", value=float(mean_values.get(col, 0)))

            input_data['type'] = st.selectbox("Select Type", df['type'].unique())
            input_data['branch'] = st.selectbox("Select Branch", df['branch'].unique())
            input_data['Acct type'] = st.selectbox("Select Account Type", df['Acct type'].unique())
            input_data['Time of day'] = st.selectbox("Select Time of Day", df['Time of day'].unique())

            # Handle one-hot encoding for categorical inputs
            for col in ['type', 'branch', 'Acct type', 'Time of day']:
                for unique_value in df[col].unique():
                    feature_col = f"{col}_{unique_value}"
                    input_data[feature_col] = 1 if input_data[col] == unique_value else 0
                del input_data[col]

            submitted = st.form_submit_button("Predict")

        if submitted:
            # Ensure input data includes all features
            for feature in feature_names:
                if feature not in input_data:
                    input_data[feature] = mean_values.get(feature, 0)

            prediction = predict_fraud(model, input_data, feature_names)
            if prediction == 0:
                st.success("The transaction is not fraudulent.")
            else:
                st.error("The transaction is fraudulent.")
