In [12]:
pip install streamlit

Note: you may need to restart the kernel to use updated packages.


In [13]:
pip install matplotlib 

Note: you may need to restart the kernel to use updated packages.


In [10]:
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
import streamlit as st
import time 
import matplotlib.pyplot as plt

In [29]:
def load_data():
    cashback_claims = pd.read_csv('cashback_claims.csv')
    processed_analysis = pd.read_csv('processed_cashback_analysis.csv')
    return cashback_claims,processed_analysis

cashback_claims,processed_analysis = load_data()

In [30]:
print(cashback_claims,processed_analysis)


   User ID  Cashback Amount  Claim Date  Number of Claims  \
0       U1        33.731404  2023-01-01                 3   
1       U2        41.747040  2023-01-02                 3   
2       U3        60.856437  2023-01-03                 1   
3       U4        42.599363  2023-01-04                 3   
4       U5        74.327885  2023-01-05                 4   
..     ...              ...         ...               ...   
95     U96        48.713566  2023-04-06                 1   
96     U97        37.699267  2023-04-07                 4   
97     U98        54.686877  2023-04-08                 4   
98     U99        26.260181  2023-04-09                 1   
99    U100        77.808967  2023-04-10                 3   

    Total Cashback Claimed  
0               326.706291  
1               224.553225  
2               325.434074  
3               143.518860  
4               234.559158  
..                     ...  
95              120.634194  
96              359.201820  
97    

In [27]:
def preprocess_data(claims, analysis):
    data=pd.concat([claims, analysis],axis=0, ignore_index=True)
    return data
data = preprocess_data(cashback_claims, processed_analysis)
data

Unnamed: 0,User ID,Cashback Amount,Claim Date,Number of Claims,Total Cashback Claimed,anomaly_score,is_anomaly,potential_fraud
0,U1,33.731404,2023-01-01,3,326.706291,,,
1,U2,41.747040,2023-01-02,3,224.553225,,,
2,U3,60.856437,2023-01-03,1,325.434074,,,
3,U4,42.599363,2023-01-04,3,143.518860,,,
4,U5,74.327885,2023-01-05,4,234.559158,,,
...,...,...,...,...,...,...,...,...
195,U95,,,1,86.878360,1.0,False,False
196,U96,,,1,48.713566,1.0,False,False
197,U97,,,1,37.699267,1.0,False,False
198,U98,,,1,54.686877,1.0,False,False


In [28]:
pip install seaborn


Note: you may need to restart the kernel to use updated packages.


In [22]:
import seaborn as sns

In [36]:
def anomaly_detection(data):
    model = IsolationForest(contamination=0.05, random_state=42)
    numeric_features = data.select_dtypes(include=['float64', 'int64']).drop(columns=['User ID'], errors='ignore')
    data['anomaly_score'] = model.fit_predict(numeric_features)
    data['status'] = data['anomaly_score'].apply(lambda x: 'Normal' if x == 1 else 'Fraud')
    return data

anomaly_detection(data)



Unnamed: 0,User ID,Cashback Amount,Claim Date,Number of Claims,Total Cashback Claimed,anomaly_score,is_anomaly,potential_fraud,status
0,U1,33.731404,2023-01-01,3,326.706291,1,,,Normal
1,U2,41.747040,2023-01-02,3,224.553225,1,,,Normal
2,U3,60.856437,2023-01-03,1,325.434074,1,,,Normal
3,U4,42.599363,2023-01-04,3,143.518860,1,,,Normal
4,U5,74.327885,2023-01-05,4,234.559158,1,,,Normal
...,...,...,...,...,...,...,...,...,...
195,U95,,,1,86.878360,1,False,False,Normal
196,U96,,,1,48.713566,1,False,False,Normal
197,U97,,,1,37.699267,1,False,False,Normal
198,U98,,,1,54.686877,1,False,False,Normal


In [41]:
%%writefile visual.py

import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def plot_visualizations(data):  # Accept 'data' as a parameter
    st.title("Cashback Claims Analysis Dashboard")

    # Status Count Bar Chart
    st.subheader("Bar Chart: Normal vs Fraudulent Claims")
    status_counts = data['status'].value_counts()
    fig, ax = plt.subplots()
    sns.barplot(x=status_counts.index, y=status_counts.values, palette='coolwarm', ax=ax)
    ax.set_title("Number of Normal vs Fraudulent Claims")
    ax.set_xlabel("Status")
    ax.set_ylabel("Count")
    st.pyplot(fig)

    # Pie Chart of Status
    st.subheader("Pie Chart: Proportion of Fraudulent and Normal Claims")
    fig2, ax2 = plt.subplots()
    ax2.pie(status_counts, labels=status_counts.index, autopct='%1.1f%%', colors=['#4CAF50', '#FF5733'])
    ax2.set_title("Proportion of Normal vs Fraudulent Claims")
    st.pyplot(fig2)

    # Scatter Plot for Anomaly Detection
    st.subheader("Scatter Plot: Anomaly Detection")
    fig3, ax3 = plt.subplots()
    numeric_features = data.select_dtypes(include=['float64', 'int64'])
    if len(numeric_features.columns) > 1:
        scatter_x = numeric_features.columns[0]
        scatter_y = numeric_features.columns[1]
        sns.scatterplot(x=numeric_features[scatter_x], y=numeric_features[scatter_y], hue=data['status'], palette='coolwarm', ax=ax3)
        ax3.set_title("Scatter Plot of Anomaly Detection")
        ax3.set_xlabel(scatter_x)
        ax3.set_ylabel(scatter_y)
        st.pyplot(fig3)
    else:
        st.warning("Not enough numeric features for a scatter plot.")

# Main Execution
if __name__ == "__main__":
    
    
    def load_data():
        data = pd.DataFrame({
            'status': ['Normal', 'Fraud', 'Normal', 'Fraud', 'Normal'],
            'feature1': [1, 5, 3, 8, 2],
            'feature2': [10, 20, 15, 25, 12]
        })
        return data

    data = load_data()  # Load your data
    plot_visualizations(data)  # Pass 'data' explicitly to the function
    st.subheader("Processed Data Preview")
    st.dataframe(data)


Writing visual.py
