CALL CENTER OPTIMIZATION

In [None]:
import streamlit as st
import pandas as pd
import openai
import numpy as np

# Load OpenAI API Key securely
openai.api_key = ""  # Replace with your actual API key

# Load dataset with caching
@st.cache_data
def load_data():
    file_path = "call_center_data_with_solutions.csv"
    return pd.read_csv(file_path)

data = load_data()

# 🔹 Categorizing the query using OpenAI
def categorize_query(query, data):
    """Uses OpenAI GPT to categorize the user query accurately."""
    if "Call_Category" not in data.columns:
        return "Unknown Category"

    unique_categories = data["Call_Category"].dropna().unique().tolist()

    prompt = f"""
    You are an AI assistant for a call center. Your task is to categorize customer queries based on predefined categories.

    **Customer Query:** "{query}"

    **Available Categories:** {unique_categories}

    Based on the query, select the most relevant category from the list.
    Only return the category name as your final answer.
    """

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "system", "content": prompt}]
        )

        category = response["choices"][0]["message"]["content"].strip()

        # Validate if the response is a known category
        if category in unique_categories:
            return category
        else:
            return "Unknown Category"

    except Exception as e:
        return f"Error with OpenAI API: {str(e)}"

# 🔹 Finding the best dataset solution using OpenAI embeddings
def get_best_dataset_solution(query, filtered_data):
    """Finds the most relevant solution from the dataset using OpenAI embeddings for similarity matching."""
    if "Call_Transcript" not in filtered_data.columns or "Solution" not in filtered_data.columns:
        return "Error: Missing necessary columns in dataset."

    if filtered_data.empty:
        return None  # No relevant category found

    # Convert call transcripts to a list
    call_transcripts = filtered_data["Call_Transcript"].dropna().tolist()

    # Get OpenAI embeddings for query and transcripts
    try:
        query_embedding = openai.Embedding.create(input=query, model="text-embedding-ada-002")["data"][0]["embedding"]
        transcript_embeddings = openai.Embedding.create(input=call_transcripts, model="text-embedding-ada-002")["data"]

        # Compute cosine similarity
        similarities = [np.dot(query_embedding, emb["embedding"]) for emb in transcript_embeddings]

        # Find the best match
        best_match_idx = np.argmax(similarities)
        best_solution = filtered_data.iloc[best_match_idx]["Solution"]

        return best_solution

    except Exception as e:
        return f"Error with OpenAI API: {str(e)}"

# 🔹 Finding the best solution using OpenAI GPT
def get_ai_generated_solution(query, filtered_data):
    """Uses OpenAI to generate a solution when no exact match is found in the dataset."""
    if "Call_Transcript" not in filtered_data.columns or "Solution" not in filtered_data.columns:
        return "Error: Missing necessary columns in dataset."

    # Create prompt with call transcripts and solutions
    call_transcripts = "\n".join(filtered_data["Call_Transcript"].dropna().tolist()[:5])  # Limit to 5 for cost
    solutions = "\n".join(filtered_data["Solution"].dropna().tolist()[:5])  # Limit to 5 for cost

    prompt = f"""
    You are an AI call center assistant. A customer has asked the following question:

    **User Query:** "{query}"

    Below are previous call transcripts and their corresponding solutions:

    **Call Transcripts:**
    {call_transcripts}

    **Solutions:**
    {solutions}

    Please find the best matching solution for the query. If no exact match exists, generate a helpful response.
    """

    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "system", "content": prompt}]
        )

        ai_solution = response["choices"][0]["message"]["content"].strip()
        return ai_solution

    except Exception as e:
        return f"Error with OpenAI API: {str(e)}"

# 🔹 Streamlit UI
st.title("AI-Powered Call Center Optimization")

user_query = st.text_area("Enter your query:")

if st.button("Find Solution"):
    if user_query.strip():
        # Step 1: Categorize the query
        category = categorize_query(user_query, data)
        st.write(f"**Identified Call Category:** {category}")

        # Step 2: Filter dataset by category
        filtered_data = data[data["Call_Category"] == category] if category != "Unknown Category" else data

        # Step 3: Find best solution from dataset
        dataset_solution = get_best_dataset_solution(user_query, filtered_data)

        # Step 4: Get AI-generated response
        ai_response = get_ai_generated_solution(user_query, filtered_data)

        # Step 5: Display Results
        st.write("### Recommended Solution:")
        if dataset_solution:
            st.write(dataset_solution)
        else:
            st.write("No exact match found in the dataset.")

        st.write("### Detailed Solution:")
        st.write(ai_response)

    else:
        st.warning("Please enter a query.")
 the majour working of this pro when an user enter a query
Using OpenAI determine the best category in the dataset then to Find the Best Solution Using OpenAI Embeddings
then find out closest match using cosine similarity.as a final result Returns the best solution based on similarity and generating a Solution Using GPT

NETWORK INTRUSION DETECTION

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Flatten, LSTM
import tensorflow as tf

# Load dataset
@st.cache_data
def load_data():
    url = "https://raw.githubusercontent.com/defcom17/NSL_KDD/master/KDDTrain+.txt"
    columns = ["duration", "protocol_type", "service", "flag", "src_bytes", "dst_bytes", "land",
               "wrong_fragment", "urgent", "hot", "num_failed_logins", "logged_in", "num_compromised",
               "root_shell", "su_attempted", "num_root", "num_file_creations", "num_shells",
               "num_access_files", "num_outbound_cmds", "is_host_login", "is_guest_login", "count",
               "srv_count", "serror_rate", "srv_serror_rate", "rerror_rate", "srv_rerror_rate",
               "same_srv_rate", "diff_srv_rate", "srv_diff_host_rate", "dst_host_count",
               "dst_host_srv_count", "dst_host_same_srv_rate", "dst_host_diff_srv_rate",
               "dst_host_same_src_port_rate", "dst_host_srv_diff_host_rate", "dst_host_serror_rate",
               "dst_host_srv_serror_rate", "dst_host_rerror_rate", "dst_host_srv_rerror_rate", "label"]
    data = pd.read_csv(url, names=columns, header=None)
    return data

def convert_label(x):
    if isinstance(x, str):
        return 0 if x.strip().lower().startswith('normal') else 1
    return x

# Preprocess data
def preprocess_data(data):
    categorical_cols = ['protocol_type', 'service', 'flag']

    for col in categorical_cols:
        le = LabelEncoder()
        data[col] = le.fit_transform(data[col])

    data['label'] = data['label'].apply(convert_label)
    data = data.apply(pd.to_numeric, errors='coerce')

    data = data.dropna(axis=1, how='all')

    X = data.drop(columns=['label'])
    y = data['label'].values

    imputer = SimpleImputer(strategy='most_frequent')
    X = imputer.fit_transform(X)

    scaler = StandardScaler()
    X = scaler.fit_transform(X)

    return X, y

# Build ANN model
def build_ann(input_shape):
    model = Sequential([
        Dense(64, activation='relu', input_shape=(input_shape,)),
        Dense(32, activation='relu'),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build CNN model
def build_cnn(input_shape):
    model = Sequential([
        Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape),
        Flatten(),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build RNN model (LSTM)
def build_rnn(input_shape):
    model = Sequential([
        LSTM(50, return_sequences=True, input_shape=input_shape),
        LSTM(50),
        Dense(1, activation='sigmoid')
    ])
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Streamlit UI
st.title("🔐 Intrusion Detection System using ML & Deep Learning")
st.sidebar.header("Model Selection")

# Load and preprocess
with st.spinner("Loading data and preprocessing..."):
    data = load_data()
    X, y = preprocess_data(data)
    sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
    for train_index, test_index in sss.split(X, y):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

# Model choices
model_choice = st.sidebar.selectbox("Choose a model", [
    "Random Forest", "SVM", "Naïve Bayes", "KNN", "Logistic Regression", "ANN", "CNN", "RNN"
])

if model_choice in ["Random Forest", "SVM", "Naïve Bayes", "KNN", "Logistic Regression"]:
    models = {
        "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
        "SVM": SVC(kernel='linear'),
        "Naïve Bayes": GaussianNB(),
        "KNN": KNeighborsClassifier(n_neighbors=5),
        "Logistic Regression": LogisticRegression(max_iter=1000)
    }

    model = models[model_choice]
    with st.spinner(f"Training {model_choice}..."):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)

    st.subheader(f"{model_choice} Results")
    st.write(f"**Accuracy:** {acc:.4f}")
    st.text("Classification Report:")
    st.text(classification_report(y_test, y_pred))

    fig, ax = plt.subplots()
    sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", ax=ax)
    st.pyplot(fig)

elif model_choice == "ANN":
    with st.spinner("Training ANN..."):
        ann = build_ann(X_train.shape[1])
        ann.fit(X_train, y_train, epochs=5, batch_size=32, verbose=0)
        acc = ann.evaluate(X_test, y_test, verbose=0)[1]
    st.subheader("ANN Results")
    st.write(f"**Accuracy:** {acc:.4f}")

elif model_choice == "CNN":
    X_train_cnn = X_train.reshape(-1, X_train.shape[1], 1)
    X_test_cnn = X_test.reshape(-1, X_test.shape[1], 1)
    with st.spinner("Training CNN..."):
        cnn = build_cnn((X_train.shape[1], 1))
        cnn.fit(X_train_cnn, y_train, epochs=5, batch_size=32, verbose=0)
        acc = cnn.evaluate(X_test_cnn, y_test, verbose=0)[1]
    st.subheader("CNN Results")
    st.write(f"**Accuracy:** {acc:.4f}")

elif model_choice == "RNN":
    X_train_rnn = X_train.reshape(-1, X_train.shape[1], 1)
    X_test_rnn = X_test.reshape(-1, X_test.shape[1], 1)
    with st.spinner("Training RNN..."):
        rnn = build_rnn((X_train.shape[1], 1))
        rnn.fit(X_train_rnn, y_train, epochs=5, batch_size=32, verbose=0)
        acc = rnn.evaluate(X_test_rnn, y_test, verbose=0)[1]
    st.subheader("RNN Results")
    st.write(f"**Accuracy:** {acc:.4f}")


FRAUD DETECTION

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Load dataset
def load_data():
    file_path = r"G:\\vs\\Telecome\\fraud_detection\\CDR-Call-Details.csv"  # Ensure the file is in the same directory
    df = pd.read_csv(file_path)
    return df

df = load_data()

# Data Preprocessing
def preprocess_data(df):
    features = ['Day Calls', 'Day Mins', 'Intl Calls', 'Intl Mins', 'CustServ Calls']
    X = df[features]
    y = df['isFraud'].astype(int)  # Convert boolean to int
    return train_test_split(X, y, test_size=0.2, random_state=42)

X_train, X_test, y_train, y_test = preprocess_data(df)

# Train Model
def train_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

model = train_model(X_train, y_train)

# Predict & Evaluate
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    report = classification_report(y_test, y_pred)
    return acc, report

accuracy, report = evaluate_model(model, X_test, y_test)

# Streamlit App
st.title("Fraud Detection in SIM Card Usage")
st.write("Detecting fraudulent SIM cloning and unusual call behaviors using machine learning.")

# Display Dataset
if st.checkbox("Show Raw Data"):
    st.dataframe(df.head())

# Display Fraud Statistics
st.subheader("Fraud Statistics")
st.write(f"Fraudulent Cases: {df['isFraud'].sum()} out of {len(df)} records")

# Accuracy & Report
st.subheader("Model Performance")
st.write(f"**Accuracy:** {accuracy:.2f}")
st.text("Classification Report:")
st.text(report)

# Fraud Visualization
st.subheader("Fraud Distribution")
fig, ax = plt.subplots()
sns.countplot(x=df['isFraud'], palette='coolwarm', ax=ax)
st.pyplot(fig)

# User Input for Prediction
st.subheader("Fraud Prediction Tool")
day_calls = st.number_input("Day Calls", min_value=0, value=30)
day_mins = st.number_input("Day Minutes", min_value=0.0, value=180.0)
intl_calls = st.number_input("International Calls", min_value=0, value=2)
intl_mins = st.number_input("International Minutes", min_value=0.0, value=10.0)
cust_serv_calls = st.number_input("Customer Service Calls", min_value=0, value=1)

if st.button("Predict Fraud"):
    input_data = np.array([[day_calls, day_mins, intl_calls, intl_mins, cust_serv_calls]])
    prediction = model.predict(input_data)[0]
    st.write("**Fraud Detected! 🚨**" if prediction == 1 else "**No Fraud Detected. ✅**")


TELECOME OPTIMIZATION

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import streamlit as st
from sklearn.cluster import KMeans
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.preprocessing import StandardScaler

# Load the dataset
file_path = r"G:\\vs\\Telecome\\telecom_network_optimization.csv"
df = pd.read_csv(file_path)

# Step 1: Data Preprocessing
features = ['Active_Users', 'Traffic_Load_MBps', 'Call_Drop_Rate', 'Latency_ms', 'Previous_Failures', 'Tower_Health_Score']
scaler = StandardScaler()
df_scaled = scaler.fit_transform(df[features])

# Step 2: K-Means Clustering for Congestion Detection
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
df['Congestion_Cluster'] = kmeans.fit_predict(df_scaled)

# Step 3: Load Balancing with Reinforcement Learning
class LoadBalancer:
    def __init__(self, alpha=0.1, gamma=0.9):
        self.q_table = {}
        self.alpha = alpha
        self.gamma = gamma

    def get_action(self, state):
        return np.argmax(self.q_table.get(state, np.zeros(2)))

    def update_q_value(self, state, action, reward, next_state):
        old_value = self.q_table.get(state, np.zeros(2))[action]
        next_max = np.max(self.q_table.get(next_state, np.zeros(2)))
        new_value = old_value + self.alpha * (reward + self.gamma * next_max - old_value)
        self.q_table.setdefault(state, np.zeros(2))[action] = new_value

load_balancer = LoadBalancer()
df['Balanced_Load'] = df['Traffic_Load_MBps'].apply(lambda x: x * 0.8 if load_balancer.get_action(x) == 1 else x)

# Step 4: Failure Risk Prediction - Machine Learning Model (Random Forest)
X = df[features]
y = df['Failure_Risk']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
rf_accuracy = accuracy_score(y_test, y_pred_rf)
rf_report = classification_report(y_test, y_pred_rf)

# Step 5: Failure Risk Prediction - Deep Learning Model
nn_model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dropout(0.2),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])
nn_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
nn_model.fit(X_train, y_train, epochs=50, batch_size=10, validation_data=(X_test, y_test))

# Step 6: Predictive Maintenance Strategy
failure_threshold = df['Tower_Health_Score'].quantile(0.2)
df['Maintenance_Flag'] = df['Tower_Health_Score'] < failure_threshold

# Streamlit Dashboard
st.title("Telecom Network Optimization Dashboard")

st.subheader("Traffic Load Distribution")
st.bar_chart(df["Traffic_Load_MBps"])

st.subheader("Latency vs. Traffic Load")
st.scatter_chart(df, x="Traffic_Load_MBps", y="Latency_ms", color="Congestion_Cluster")

st.subheader("Tower Health Score Distribution")
st.bar_chart(df["Tower_Health_Score"])

st.subheader("Failure Risk Prediction - Random Forest")
st.text(f"Random Forest Accuracy: {rf_accuracy:.2f}")
st.text("Classification Report:")
st.text(rf_report)

st.subheader("Predictive Maintenance")
st.dataframe(df[['Tower_Health_Score', 'Maintenance_Flag']])


TELECOME SEGMENTATION

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import streamlit as st
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from io import BytesIO

# Streamlit App Title
st.title("📊 Telecom Customer Segmentation")

# Sidebar for User Inputs
st.sidebar.header("🔧 Settings")
uploaded_file = st.sidebar.file_uploader("📂 Upload Telecom Customer Data (CSV)", type=["csv"])

if uploaded_file is not None:
    # Load dataset
    df = pd.read_csv(uploaded_file)

    # Check for required columns
    required_columns = {"Monthly Spend ($)", "Data Usage (GB)", "Voice Minutes Used", "SMS Sent"}
    if not required_columns.issubset(df.columns):
        st.error(f"⚠️ The dataset must contain the following columns: {required_columns}")
    else:
        # Drop missing values
        df.dropna(inplace=True)

        # Select relevant features
        features = list(required_columns)
        X = df[features]

        # Standardize the data
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)

        # User input for number of clusters
        n_clusters = st.sidebar.slider("🔢 Select Number of Clusters", min_value=2, max_value=10, value=3, step=1)

        # Apply K-Means Clustering
        kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
        df["Cluster"] = kmeans.fit_predict(X_scaled)

        # Define dynamic bundle recommendations
        bundle_mapping = {i: f"Plan {i+1}" for i in range(n_clusters)}
        df["Recommended Bundle"] = df["Cluster"].map(bundle_mapping)

        # Display the segmented dataset
        st.write("### 📋 Segmented Customer Data")
        st.dataframe(df.head(10))

        # Cluster Insights
        st.write("### 📊 Cluster Insights")
        cluster_summary = df.groupby("Cluster")[features].mean().round(2)
        st.dataframe(cluster_summary)

        # Visualization of Clusters
        st.write("### 📊 Customer Segmentation Visualization")

        # Select feature pair for visualization
        feature_x = st.selectbox("Select X-axis Feature", features, index=0)
        feature_y = st.selectbox("Select Y-axis Feature", features, index=1)

        fig, ax = plt.subplots(figsize=(10, 6))
        sns.scatterplot(x=df[feature_x], y=df[feature_y], hue=df["Cluster"], palette="viridis", ax=ax)
        ax.set_xlabel(feature_x)
        ax.set_ylabel(feature_y)
        ax.set_title(f"Customer Segmentation: {feature_x} vs {feature_y}")
        st.pyplot(fig)

        # Download segmented data
        output = BytesIO()
        df.to_csv(output, index=False)
        output.seek(0)

        st.download_button(
            label="📥 Download Segmented Data",
            data=output,
            file_name="telecom_customer_segmentation.csv",
            mime="text/csv"
        )

