<a href="https://colab.research.google.com/github/Aditya-21052131/Intelligent-Payment-Assistant/blob/main/IntelligentPaymentAssistant.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install pandas scikit-learn plotly kafka-python


Collecting kafka-python
  Downloading kafka_python-2.0.2-py2.py3-none-any.whl.metadata (7.8 kB)
Downloading kafka_python-2.0.2-py2.py3-none-any.whl (246 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m246.5/246.5 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: kafka-python
Successfully installed kafka-python-2.0.2


In [2]:
import random
import pandas as pd
from datetime import datetime

payment_methods = ['UPI', 'Credit Card', 'Net Banking', 'Wallet']
statuses = ['success', 'failure', 'pending']
gateways = ['Gateway_A', 'Gateway_B', 'Gateway_C']

# Function to simulate payment transaction data
def generate_payment_data():
    return {
        "transaction_id": str(random.randint(10000, 99999)),
        "user_id": f"user_{random.randint(1, 100)}",
        "amount": round(random.uniform(100, 10000), 2),
        "status": random.choice(statuses),
        "payment_method": random.choice(payment_methods),
        "timestamp": datetime.now().strftime("%Y-%m-%dT%H:%M:%S"),
        "response_time": round(random.uniform(0.1, 3.0), 2),
        "gateway": random.choice(gateways)
    }

# Simulate generating 100 payment transactions
transactions = [generate_payment_data() for _ in range(100)]

# Convert to a Pandas DataFrame for easy manipulation
transactions_df = pd.DataFrame(transactions)
transactions_df.head()


Unnamed: 0,transaction_id,user_id,amount,status,payment_method,timestamp,response_time,gateway
0,93214,user_12,8654.43,failure,Wallet,2024-09-15T14:50:24,2.98,Gateway_A
1,43243,user_99,7164.69,failure,UPI,2024-09-15T14:50:24,1.05,Gateway_B
2,37157,user_99,7816.82,pending,UPI,2024-09-15T14:50:24,2.47,Gateway_B
3,19571,user_20,7666.71,failure,Net Banking,2024-09-15T14:50:24,1.14,Gateway_C
4,76741,user_54,5550.34,pending,Credit Card,2024-09-15T14:50:24,2.09,Gateway_A


In [4]:
# Calculate summary statistics
print("Average Response Time:", transactions_df['response_time'].mean())

# Calculate success rate
success_rate = transactions_df[transactions_df['status'] == 'success'].shape[0] / transactions_df.shape[0]
print("Success Rate:", success_rate)

# Group by payment gateway and calculate metrics
gateway_summary = transactions_df.groupby('gateway').agg({
    'response_time': ['mean'],
    'amount': ['mean'],
    'transaction_id': ['count'],
    'status': lambda x: (x == 'success').mean()  # Success rate
})
gateway_summary


Average Response Time: 1.6444000000000005
Success Rate: 0.33


Unnamed: 0_level_0,response_time,amount,transaction_id,status
Unnamed: 0_level_1,mean,mean,count,<lambda>
gateway,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
Gateway_A,1.740909,5104.793636,33,0.333333
Gateway_B,1.536765,5487.821471,34,0.264706
Gateway_C,1.658788,4777.188788,33,0.393939


In [5]:
from sklearn.ensemble import IsolationForest

# Train an Isolation Forest for anomaly detection
features = transactions_df[['response_time', 'amount']]
model = IsolationForest(contamination=0.1)
model.fit(features)

# Detect anomalies (1: normal, -1: anomaly)
transactions_df['anomaly'] = model.predict(features)
anomalies = transactions_df[transactions_df['anomaly'] == -1]
print("Anomalies Detected:")
anomalies


Anomalies Detected:


Unnamed: 0,transaction_id,user_id,amount,status,payment_method,timestamp,response_time,gateway,anomaly
0,93214,user_12,8654.43,failure,Wallet,2024-09-15T14:50:24,2.98,Gateway_A,-1
16,17104,user_76,585.24,pending,Net Banking,2024-09-15T14:50:24,0.41,Gateway_A,-1
18,51901,user_57,9884.69,failure,Credit Card,2024-09-15T14:50:24,2.02,Gateway_C,-1
19,70972,user_67,9372.37,pending,Wallet,2024-09-15T14:50:24,1.34,Gateway_B,-1
40,53531,user_74,8045.04,success,Credit Card,2024-09-15T14:50:24,0.29,Gateway_A,-1
48,32634,user_36,6071.5,success,UPI,2024-09-15T14:50:24,0.13,Gateway_B,-1
53,40401,user_94,9388.59,success,UPI,2024-09-15T14:50:24,0.65,Gateway_C,-1
63,91661,user_62,3321.9,success,UPI,2024-09-15T14:50:24,3.0,Gateway_B,-1
90,89738,user_33,8714.93,success,Wallet,2024-09-15T14:50:24,0.26,Gateway_A,-1
91,13614,user_87,289.33,pending,Wallet,2024-09-15T14:50:24,2.83,Gateway_A,-1


In [6]:
import plotly.express as px

# Visualize response time of transactions
fig = px.scatter(transactions_df, x='timestamp', y='response_time', color='status',
                 title='Transaction Response Time', labels={'response_time': 'Response Time (seconds)'})
fig.show()


In [7]:
fig = px.histogram(transactions_df, x='amount', nbins=20, title='Distribution of Transaction Amounts')
fig.show()


In [8]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

# Create features (response time and success/failure) and target (gateway)
transactions_df['success'] = transactions_df['status'].apply(lambda x: 1 if x == 'success' else 0)
X = transactions_df[['response_time', 'success']]
y = transactions_df['gateway']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

# Train the decision tree classifier
classifier = DecisionTreeClassifier()
classifier.fit(X_train, y_train)

# Make predictions (e.g., which gateway to choose for a transaction with a response time of 1.5 seconds and success)
predicted_gateway = classifier.predict([[1.5, 1]])  # Response time 1.5 seconds, successful transaction
print(f"Recommended Gateway: {predicted_gateway}")


Recommended Gateway: ['Gateway_A']



X does not have valid feature names, but DecisionTreeClassifier was fitted with feature names



In [9]:
# Save DataFrame to CSV
transactions_df.to_csv('transactions.csv', index=False)

# Save model using joblib
import joblib
joblib.dump(model, 'isolation_forest_model.pkl')


['isolation_forest_model.pkl']