<a href="https://colab.research.google.com/github/Muthon1/DataScience/blob/main/Creating_Your_Own_Chatbot_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
# Installing the required packages
!pip install streamlit
!pip install pyngrok
!pip install streamlit-option-menu



In [8]:
%%writefile app.py
# Importing necessary libraries
import pandas as pd
import numpy as np
import streamlit as st
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier

# Function to load and preprocess the dataset
# Function to load the dataset
def load_data():
    data = pd.read_csv('/content/Crime_Data_from_2020_to_Present.csv')

    # Check the shape and first few rows
    st.write(f"Dataset Shape: {data.shape}")
    st.write("First few rows of the dataset:")
    st.write(data.head())

    # List of columns based on the dataset's structure
    column_names = ["DR_NO", "Date Rptd", "DATE OCC", "TIME OCC", "AREA", "AREA NAME", "Rpt Dist No", "Part 1-2",
                    "Crm Cd", "Crm Cd Desc", "Mocodes", "Vict Age", "Vict Sex", "Vict Descent", "Premis Cd", "Premis Desc",
                    "Weapon Used Cd", "Weapon Desc", "Status", "Status Desc", "Crm Cd 1", "Crm Cd 2", "Crm Cd 3", "Crm Cd 4",
                    "LOCATION", "Cross Street", "LAT", "LON"]

    # Assign column names if the dataset matches the expected number of columns
    if data.shape[1] == len(column_names):
        data.columns = column_names
    else:
        st.warning("The number of columns in the dataset does not match the expected number. Please adjust the column names.")

    # Replace '?' with NaN
    data.replace('?', np.nan, inplace=True)

    # Convert all columns to numeric (for modeling purposes)
    for column in data.columns:
        data[column] = pd.to_numeric(data[column], errors='coerce')

    # Fill missing values with median for numeric columns
    for column in data.select_dtypes(include=['float64', 'int64']).columns:
        data[column] = data[column].fillna(data[column].median())

    return data

# Function to train the Random Forest model
def train_model(data):
    # Select features (assuming we predict a binary target, e.g., violent crime vs non-violent crime)
    # Replace 'Target' with the actual target column you want to predict
    X = data.drop(['DR_NO', 'Date Rptd', 'DATE OCC', 'TIME OCC', 'LOCATION', 'Status Desc'], axis=1)  # Exclude non-numeric columns
    y = (data['Part 1-2'] == 'Part 1').astype(int)  # Example: Predict if it's a violent crime (Part 1 crimes)

    # Split the data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Scaling the feature variables
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)

    # Training a Random Forest classifier
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train_scaled, y_train)

    return model, scaler  # Return the trained model and the scaler for later use

# Main function to define the Streamlit app layout
def main():
    # Set the app title and introductory text
    st.title("Crime Data Prediction")
    st.write("Your interactive tool for crime data analysis")

    # Load and train the model
    @st.cache_resource
    def load_trained_model():
        data = load_data()
        model, scaler = train_model(data)
        return model, scaler, data

    # Load the model, scaler, and data
    model, scaler, data = load_trained_model()

    # Create two tabs: Chat Assistant and Crime Prediction
    tab1, tab2 = st.tabs(["Crime Information", "Crime Prediction"])

    with tab1:
        st.subheader("Ask me about crime data!")
        user_query = st.selectbox("Choose your question:", ["What is a crime?", "What are the types of crimes?", "What is the most common crime?"])
        if st.button("Get Answer"):
            if user_query == "What is a crime?":
                st.write("A crime is an illegal act that harms individuals or society.")
            elif user_query == "What are the types of crimes?":
                st.write("Crimes are classified into violent and non-violent types.")
            else:
                most_common_crime = data['Crm Cd Desc'].mode()[0]
                st.write(f"The most common crime in the dataset is: {most_common_crime}")

    with tab2:
        st.subheader("Crime Prediction")
        st.write("Enter the details to predict the type of crime:")

        # Input fields for crime data details (based on the dataset)
        area = st.text_input("Area")
        age = st.number_input("Victim Age", 0, 100)
        sex = st.selectbox("Victim Sex", ["M", "F"])
        weapon_used = st.selectbox("Weapon Used", ["Yes", "No"])

        # Assess risk on button click
        if st.button("Predict Crime Type"):
            input_data = np.array([[age, sex, area, weapon_used]])  # Modify with relevant features from dataset
            input_data = pd.DataFrame(input_data, columns=['Vict Age', 'Vict Sex', 'AREA', 'Weapon Used Cd'])

            # Scale the input data
            scaled_data = scaler.transform(input_data)

            # Get prediction (binary: Part 1 crime vs. non-Part 1)
            prediction = model.predict(scaled_data)
            st.write("The model predicts: " + ("Violent Crime" if prediction == 1 else "Non-Violent Crime"))

if __name__ == "__main__":
    main()


Overwriting app.py


In [9]:
from pyngrok import ngrok

# Install Ngrok authtoken
!ngrok authtoken 2s1Pw5f1wJsL15HDSYJ8nZsrC6c_2A3sQwbgC6HVFUZFnFgNq

# Run Streamlit app
!streamlit run app.py &>/dev/null&

# Create an Ngrok tunnel for port 8501 (Streamlit default port)
public_url = ngrok.connect(8501, "http")

# Print the public URL to access the app
print(public_url)


Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml
NgrokTunnel: "https://f6cb-35-226-28-129.ngrok-free.app" -> "http://localhost:8501"
