In [1]:
!pip install streamlit scikit-learn seaborn joblib




In [8]:
import pandas as pd

# Load the dataset
file_path = "Autism-Child-Data.csv"  
df = pd.read_csv(file_path)

# Print column names
print("Columns in dataset:", df.columns)


Columns in dataset: Index(['id', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score',
       'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', 'age',
       'gender', 'ethnicity', 'jundice', 'austim', 'contry_of_res',
       'used_app_before', 'result', 'age_desc', 'relation', 'Class/ASD'],
      dtype='object')


In [15]:
# Automatically detect the last column as target
target_column = df.columns[-1]

# Convert categorical target values into numerical values (0 or 1)
df[target_column] = df[target_column].map({'YES': 1, 'NO': 0})  # Adjust mapping as needed

# Handle missing values in the target column by assignment instead of inplace
df[target_column] = df[target_column].fillna(0)

# Define X (features) and y (target)
X = df.drop(columns=[target_column])
y = df[target_column].astype(int)


In [37]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report
import openai
import numpy as np

# ---------------------------
# Set Your OpenAI API Key (for local dev/testing)
# ---------------------------
openai.api_key = "YOUR_OPENAI_API_KEY"

# ---------------------------
# Updated CSS to match header's pastel gradient
# ---------------------------
st.markdown("""
    <style>
    @import url('https://fonts.googleapis.com/css2?family=Nunito:wght@400;600&display=swap');

    /* Global pastel gradient background (pink to orange/yellow) */
    html, body, .stApp {
        height: 100% !important;
        margin: 0 !important;
        padding: 0 !important;
        /* Replace these colors if you want a slightly different pastel blend */
        background: linear-gradient(135deg, #FFC1CC, #FFD3FF, #FFFACD) !important;
        background-size: cover !important;
        background-repeat: no-repeat !important;
        color: #333 !important;
        font-family: 'Nunito', sans-serif !important;
    }

    /* Main content container: semi-transparent white so gradient shows behind */
    .main .block-container {
        background: rgba(255, 255, 255, 0.7) !important;
        border: 1px solid #ddd !important;
        border-radius: 8px !important;
        box-shadow: 0 2px 5px rgba(0,0,0,0.05) !important;
        padding: 2rem !important;
        margin: 2rem auto !important;
    }

    /* Sidebar with a reversed pastel gradient for harmony */
    .sidebar .sidebar-content {
        background: linear-gradient(135deg, #FFFACD, #FFD3FF, #FFC1CC) !important;
        border-right: 1px solid #ddd !important;
        padding-top: 2rem !important;
        padding-left: 1rem !important;
        padding-right: 1rem !important;
        color: #333 !important;
    }

    .sidebar h2 {
        font-size: 1.3rem !important;
        font-weight: 600 !important;
        margin-bottom: 1rem;
        color: #111 !important;
    }

    /* Headings in a darker shade for contrast */
    h1, h2, h3, h4, h5, h6 {
        font-family: 'Nunito', sans-serif !important;
        color: #111 !important;
        margin-top: 0.5rem;
        margin-bottom: 0.5rem;
    }

    /* Buttons with pastel gradient to match header */
    .stButton>button {
        background: linear-gradient(135deg, #FFC1CC, #FFD3FF, #FFFACD) !important;
        color: #333 !important;
        border: 1px solid #ccc;
        border-radius: 5px;
        font-weight: 600;
        font-size: 1rem;
        padding: 0.6rem 1rem;
        cursor: pointer;
    }
    .stButton>button:hover {
        background: linear-gradient(135deg, #FFFACD, #FFD3FF, #FFC1CC) !important;
        color: #111 !important;
    }

    .stTextInput>div>div>input, .stSelectbox>div>div>div>input, .stNumberInput input {
        background-color: #FFFFFF !important;
        color: #333 !important;
        border-radius: 5px;
        border: 1px solid #ccc !important;
    }

    .dataframe {
        border: 1px solid #ccc !important;
        border-radius: 5px;
        margin-bottom: 1rem;
    }

    .mpl-figure, .mpl-figure-zoom, .mpl-figure-tooltip {
        background-color: rgba(255, 255, 255, 0.7) !important;
    }

    /* Bordered-text class for interactive boxes */
    .bordered-text {
        border: 1px solid #ccc;
        border-radius: 6px;
        padding: 1rem;
        background-color: rgba(255, 255, 255, 0.8);
        margin-bottom: 1rem;
    }
    </style>
""", unsafe_allow_html=True)

# ---------------------------
# Helper Functions
# ---------------------------
def preprocess_data(df, target_column):
    """
    Preprocess the dataset:
      - Convert all string columns to lowercase (and strip whitespace) to unify categories
      - Drop columns that may leak information.
      - Convert the target column from 'YES'/'NO' to 1/0 if needed.
      - Label encode non-numeric columns (except target).
    """
    # 1) Unify text columns
    for col in df.select_dtypes(include='object').columns:
        df[col] = df[col].str.strip().str.lower()
    
    # 2) Drop columns that may leak information
    for col in ['Age_Mons', 'id', 'result']:
        if col in df.columns:
            df.drop(columns=[col], inplace=True)
    
    # 3) Convert target column from YES/NO to 1/0 if needed
    if df[target_column].dtype == 'object':
        df[target_column] = df[target_column].map({'yes': 1, 'no': 0})
        df[target_column] = df[target_column].fillna(0)
    
    # 4) Label encode the remaining categorical columns
    label_encoders = {}
    for col in df.columns:
        if df[col].dtype == 'object' and col != target_column:
            le = LabelEncoder()
            df[col] = le.fit_transform(df[col])
            label_encoders[col] = le
    
    return df, label_encoders

def generate_gpt_response(user_input):
    """
    Generates a response using OpenAI's GPT model.
    """
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an expert on Autism Spectrum Disorder. Provide helpful, accurate information."},
                {"role": "user", "content": user_input},
            ],
            max_tokens=200,
            temperature=0.7
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        return f"Error: {str(e)}"

# ---------------------------
# Streamlit App Navigation
# ---------------------------
pages = ["Welcome", "Instructions", "Sample Dataset", "Prediction", "AI Chatbot"]
page = st.sidebar.radio("Navigation", pages)

# ---------------------------
# 1) Welcome Page
# ---------------------------
if page == "Welcome":
    st.image("ASD_Header.png", use_container_width=True)

    st.title("Welcome to the ASD Detection Web App")
    st.markdown("""
    <div class="bordered-text">
    <strong>What is ASD?</strong><br><br>
    Autism Spectrum Disorder (ASD) is a developmental condition that impacts how individuals communicate, behave, and interact with the world around them. 
    It is called a "spectrum" because symptoms and severity can vary widely from person to person.<br><br>

    <strong>ASD in Children:</strong><br><br>
    Children with ASD often show signs like difficulty in social interactions, repetitive behaviors, and sensitivity to sensory inputs. 
    Early intervention can greatly improve outcomes by providing support with communication, social skills, and learning strategies.<br><br>

    <strong>ASD in Adults:</strong><br><br>
    Many adults with ASD lead fulfilling lives, but they may continue to experience challenges in social relationships, 
    employment, and daily functioning. Proper support, accommodations, and understanding can help them thrive.<br><br>

    <strong>How This Web App Helps:</strong><br><br>
    This web application assists in the <em>early detection</em> of ASD, particularly for children, by using data analysis and machine learning. 
    It allows you to upload a dataset, train or load a pre-trained model, and then make predictions about whether a child might show signs of ASD. 
    Additionally, it features an AI-powered chatbot that can answer questions about ASD, its symptoms, and possible interventions.
    </div>
    """, unsafe_allow_html=True)

# ---------------------------
# 2) Instructions Page
# ---------------------------
elif page == "Instructions":
    st.title("Instructions")
    st.markdown("""
    <div class="bordered-text">
    <strong>How to Use this Web App:</strong><br><br>
    1. <strong>Sample Dataset:</strong><br>
       Go to the 'Sample Dataset' page to preview and download a sample dataset on child autism.<br><br>
    2. <strong>Prediction:</strong><br>
       On the 'Prediction' page, upload your dataset (CSV format) or use the sample data. 
       The app will preprocess the data, load or train a RandomForest model, evaluate its performance, and allow you to make predictions.<br><br>
    3. <strong>AI Chatbot:</strong><br>
       The 'AI Chatbot' page features an AI-powered chatbot where you can ask any questions related to ASD and receive informative responses.<br><br>
    <em>Note:</em> This tool is for educational purposes only and should not replace professional medical advice.
    </div>
    """, unsafe_allow_html=True)

# ---------------------------
# 3) Sample Dataset Page
# ---------------------------
elif page == "Sample Dataset":
    st.title("Sample Child Autism Dataset")
    st.markdown("Download this sample dataset to test the ASD Detection System.")

    try:
        # Make sure 'Autism-Child-Data.csv' is in the same folder as this script
        df_sample = pd.read_csv("Autism-Child-Data.csv")
        st.write(df_sample.head())

        # Convert the DataFrame to CSV for download
        csv_data = df_sample.to_csv(index=False)
        st.download_button(
            label="Download Child Autism CSV",
            data=csv_data,
            file_name="Autism-Child-Data.csv",
            mime="text/csv"
        )
    except FileNotFoundError:
        st.error("Autism-Child-Data.csv not found. Please place it in the same directory as this app.")

# ---------------------------
# 4) Prediction Page
# ---------------------------
elif page == "Prediction":
    st.title("ASD Prediction for Children")
    uploaded_file = st.file_uploader("Upload your Autism-Child-Data.csv file", type=["csv"])
    
    if uploaded_file:
        df = pd.read_csv(uploaded_file)
        st.subheader("Dataset Preview")
        st.write(df.head())
        
        target_column = "Class/ASD"
        st.write(f"Detected Target Column: **{target_column}**")
        
        # Preprocess data and prepare for model training
        df_processed, encoders = preprocess_data(df.copy(), target_column)
        X = df_processed.drop(columns=[target_column])
        y = df_processed[target_column].astype(int)
        
        model_file = "asd_model_children.pkl"
        retrain_flag = False
        try:
            model = joblib.load(model_file)
            # Check for leaking columns in the pre-trained model
            if hasattr(model, "feature_names_in_") and any(leak in model.feature_names_in_ for leak in ["id", "result"]):
                st.warning("Pre-trained model contains leaking columns. Retraining to remove them.")
                retrain_flag = True
        except FileNotFoundError:
            retrain_flag = True
        
        if retrain_flag:
            st.write("🛠 Training new model without leaking columns...")
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=42
            )
            model = RandomForestClassifier(n_estimators=100, random_state=42)
            model.fit(X_train, y_train)
            joblib.dump(model, model_file)
            st.write("✅ Model trained and saved.")
        else:
            st.write("✅ Pre-trained model loaded.")
        
        # ---------------------------
        # Model Evaluation (Bar Chart + Heatmap)
        # ---------------------------
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        accuracy = model.score(X_test, y_test)
        st.subheader(f"Model Accuracy on Test Split: {accuracy:.2%}")

        # Cross-Validation Scores (Bar Chart)
        cv_scores = cross_val_score(model, X, y, cv=5)
        st.markdown("**Cross-Validation Scores:**")
        fig_cv, ax_cv = plt.subplots()
        ax_cv.bar(range(len(cv_scores)), cv_scores, color='#bbb')
        ax_cv.set_xlabel("Fold")
        ax_cv.set_ylabel("CV Score")
        ax_cv.set_ylim([0, 1])
        ax_cv.set_title("Cross-Validation Scores")
        st.pyplot(fig_cv)

        mean_cv = cv_scores.mean()
        st.markdown(f"**Mean CV Accuracy:** {mean_cv:.2%}")

        # Confusion Matrix (Heatmap)
        preds = model.predict(X_test)
        cm = confusion_matrix(y_test, preds)
        st.markdown("**Confusion Matrix (Heatmap):**")
        fig_cm, ax_cm = plt.subplots()
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=ax_cm)
        ax_cm.set_xlabel("Predicted")
        ax_cm.set_ylabel("Actual")
        st.pyplot(fig_cm)

        # Classification Report (Styled DataFrame)
        st.markdown("**Classification Report:**")
        report_dict = classification_report(y_test, preds, output_dict=True)
        df_report = pd.DataFrame(report_dict).transpose()
        st.dataframe(df_report.style.background_gradient(cmap='Blues', axis=1))

        # ---------------------------
        # Prediction Inputs
        # ---------------------------
        st.subheader("Make a Prediction")
        user_input = {}
        for col in X.columns:
            if col in encoders:
                original_categories = list(encoders[col].classes_)
                user_input[col] = st.selectbox(f"Select {col}", original_categories)
            else:
                min_val = float(X[col].min())
                max_val = float(X[col].max())
                median_val = float(X[col].median())
                user_input[col] = st.number_input(f"Enter value for {col}", min_val, max_val, median_val)
        
        if st.button("Predict"):
            st.write("Current user inputs:", user_input)
            input_df = pd.DataFrame([user_input])
            for col, le in encoders.items():
                if col in input_df.columns:
                    user_value = input_df[col].iloc[0]
                    valid_classes = set(le.classes_)
                    if user_value not in valid_classes:
                        st.error(f"Unseen category '{user_value}' for column '{col}'. Please pick a valid option.")
                        st.stop()
                    else:
                        input_df[col] = le.transform(input_df[col])
            prediction = model.predict(input_df)[0]
            result_text = "ASD Positive" if prediction == 1 else "ASD Negative"
            st.success(f"Prediction: **{result_text}**")

        # ---------------------------
        # Data Analysis (Count Plot + Optional Correlation Heatmap)
        # ---------------------------
        st.subheader("Data Analysis")
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.countplot(x=df[target_column], ax=ax)
        ax.set_title("Distribution of ASD Classification")
        st.pyplot(fig)

        st.markdown("**Correlation Heatmap:**")
        corr = df_processed.corr()
        fig_corr, ax_corr = plt.subplots(figsize=(10, 6))
        sns.heatmap(corr, ax=ax_corr, cmap="coolwarm", annot=False)
        ax_corr.set_title("Correlation Heatmap of Processed Features")
        st.pyplot(fig_corr)

# ---------------------------
# 5) AI Chatbot Page
# ---------------------------
elif page == "AI Chatbot":
    st.title("AI-Powered Chatbot")
    st.markdown("Ask any questions related to Autism Spectrum Disorder (ASD) and get responses powered by AI.")

    user_query = st.text_input("Your Question:")
    if st.button("Send"):
        if user_query:
            with st.spinner("Generating response..."):
                response = generate_gpt_response(user_query)
            st.markdown("**Chatbot:**")
            st.write(response)
        else:
            st.error("Please enter a question.")
    
    st.markdown("**Disclaimer:** This chatbot is for informational purposes only and should not replace professional medical advice.")


Overwriting app.py


In [2]:
!streamlit run app.py


^C


In [3]:
!cd

C:\Users\KIIT


In [4]:
import os
print(os.getcwd())  


C:\Users\KIIT


In [5]:
import os
print(os.listdir())  


['.anaconda', '.bito', '.cache', '.conda', '.condarc', '.continuum', '.ipynb_checkpoints', '.ipython', '.jupyter', '.matplotlib', '.mozilla', '.ms-ad', '.nbi', '.oracle_jre_usage', '.profile', '.redhat', '.vivado_hls', '.vscode', '.Xilinx', '1styrresult.pdf', 'ai', 'AICTE Internship profile.pdf', 'anaconda3', 'app.py', 'AppData', 'Application Data', 'archive (3).zip', 'archive (4).zip', 'archive.zip', 'ASD_Project.ipynb', 'Autism-Child-Data.csv', 'Automobile.csv', 'c-prog', 'clg', 'Cloud_Fundamental.pptx[1].pdf', 'Contacts', 'Cookies', 'Documents', 'Downloads', 'Favorites', 'ibm', 'IMG_20241221_192029.jpg', 'IMG_5454.jpeg.jpg', 'IMG_5471.png', 'IMG_5578.jpeg.jpg', 'inAsc.txt', 'inDesc.txt', 'indian_summer_beauty_products.csv', 'inRand.txt', 'IntelGraphicsProfiles', 'Introduction_to_Cloud.pptx[1].pdf', 'KIIT HDFC Payment gateway.pdf', 'LAB1_3629.ipynb', 'LAB2_3629.ipynb', 'LAB3Dictinaryy.ipynb', 'LAB3Dictionary_3629.ipynb', 'LAB5.ipynb', 'LINEAR_REG.ipynb', 'Links', 'Local Settings', 'M

In [26]:
import pandas as pd

# Load the same dataset used by the app
df_full = pd.read_csv("Autism-Child-Data.csv")

# Find a row where the Class/ASD is "YES"
positive_rows = df_full[df_full["Class/ASD"] == "YES"]

if not positive_rows.empty:
    # Take the first known positive row
    known_positive = positive_rows.iloc[0]
    print("Known positive row found:\n", known_positive)
else:
    print("No rows with 'YES' found in the dataset!")


Known positive row found:
 id                             5
A1_Score                       1
A2_Score                       1
A3_Score                       1
A4_Score                       1
A5_Score                       1
A6_Score                       1
A7_Score                       1
A8_Score                       1
A9_Score                       1
A10_Score                      1
age                            5
gender                         m
ethnicity                 Others
jundice                      yes
austim                        no
contry_of_res      United States
used_app_before               no
result                        10
age_desc              4-11 years
relation                  Parent
Class/ASD                    YES
Name: 4, dtype: object
