In [3]:
!pip install streamlit scikit-learn seaborn joblib




In [8]:
import pandas as pd

# Load the dataset
file_path = "Autism-Child-Data.csv"  
df = pd.read_csv(file_path)

# Print column names
print("Columns in dataset:", df.columns)


Columns in dataset: Index(['id', 'A1_Score', 'A2_Score', 'A3_Score', 'A4_Score', 'A5_Score',
       'A6_Score', 'A7_Score', 'A8_Score', 'A9_Score', 'A10_Score', 'age',
       'gender', 'ethnicity', 'jundice', 'austim', 'contry_of_res',
       'used_app_before', 'result', 'age_desc', 'relation', 'Class/ASD'],
      dtype='object')


In [15]:
# Automatically detect the last column as target
target_column = df.columns[-1]

# Convert categorical target values into numerical values (0 or 1)
df[target_column] = df[target_column].map({'YES': 1, 'NO': 0})  # Adjust mapping as needed

# Handle missing values in the target column by assignment instead of inplace
df[target_column] = df[target_column].fillna(0)

# Define X (features) and y (target)
X = df.drop(columns=[target_column])
y = df[target_column].astype(int)


In [37]:
%%writefile app.py
import streamlit as st
import pandas as pd
import joblib
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, classification_report

# ---------------------------
# Predefined Q&A Dictionary
# ---------------------------
sample_qa = {
    "What is ASD?": (
        "ASD (Autism Spectrum Disorder) is a developmental disorder affecting social communication and behavior."
    ),
    "What are symptoms?": (
        "Common ASD symptoms include difficulty with social interactions, repetitive behaviors, and challenges in communication."
    ),
    "How is it diagnosed?": (
        "ASD is diagnosed through behavioral assessments, standardized screening tools, and clinical evaluations."
    ),
    "What is the treatment?": (
        "Treatment for ASD often includes behavioral therapy, speech therapy, occupational therapy, and educational support."
    ),
    "Can it be cured?": (
        "There is no cure for ASD, but early intervention and therapies can help manage symptoms."
    )
}

# ---------------------------
# Helper Functions
# ---------------------------
def chatbot_response(user_input):
    """
    Return a keyword-based response for ASD-related questions.
    """
    text = user_input.lower()
    if "asd" in text:
        return "ASD (Autism Spectrum Disorder) is a developmental disorder affecting social communication and behavior."
    elif "symptom" in text:
        return "Common ASD symptoms include difficulty with social interactions, repetitive behaviors, and challenges in communication."
    elif "diagnos" in text:
        return "ASD is diagnosed through behavioral assessments, standardized screening tools, and clinical evaluations."
    elif "cure" in text:
        return "There is no cure for ASD, but therapies and support can help manage symptoms."
    elif "treat" in text:
        return "Treatment for ASD often includes behavioral therapy, speech therapy, occupational therapy, and educational support."
    else:
        return "Sorry, I don't understand that question. Try asking about ASD symptoms, diagnosis, or treatment."

def preprocess_data(df, target_column):
    """
    Preprocess the dataset:
      1) Drop irrelevant columns if needed (e.g., 'Age_Mons', 'id', 'result').
      2) Convert the target column from 'YES'/'NO' to 1/0 if it's object-type.
      3) Label encode object columns (except target) and store encoders.
    """
    # Drop columns that may leak information
    for col in ['Age_Mons', 'id', 'result']:
        if col in df.columns:
            df.drop(columns=[col], inplace=True)
    
    # Convert target column from YES/NO to 1/0 if needed
    if df[target_column].dtype == 'object':
        df[target_column] = df[target_column].map({'YES': 1, 'NO': 0})
        df[target_column] = df[target_column].fillna(0)
    
    label_encoders = {}
    for col in df.columns:
        if df[col].dtype == 'object' and col != target_column:
            le = LabelEncoder()
            df[col] = le.fit_transform(df[col])
            label_encoders[col] = le
    return df, label_encoders

# ---------------------------
# Streamlit App
# ---------------------------
st.title("Autism Spectrum Disorder (ASD) Detection for Children")

# Upload CSV file
uploaded_file = st.file_uploader("Upload your Autism-Child-Data.csv file", type=["csv"])

if uploaded_file:
    # Load dataset
    df = pd.read_csv(uploaded_file)
    st.subheader("Dataset Preview")
    st.write(df.head())
    
    # Set target column (assumed to be "Class/ASD")
    target_column = "Class/ASD"
    st.write(f"Detected Target Column: **{target_column}**")
    
    # Preprocess Data (remove leaking columns and label encode)
    df_processed, encoders = preprocess_data(df.copy(), target_column)
    
    # Separate features (X) and target (y)
    X = df_processed.drop(columns=[target_column])
    y = df_processed[target_column].astype(int)
    
    # ---------------------------
    # Model Handling: Load or Train Model
    # ---------------------------
    model_file = "asd_model_children.pkl"
    retrain_flag = False
    try:
        model = joblib.load(model_file)
        # Check if model's training features include any leaking columns
        if hasattr(model, "feature_names_in_") and any(leak in model.feature_names_in_ for leak in ["id", "result"]):
            st.warning("Pre-trained model contains leaking columns. Retraining to remove them.")
            retrain_flag = True
    except FileNotFoundError:
        retrain_flag = True
    
    if retrain_flag:
        st.write("🛠 Training new model without leaking columns...")
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        model = RandomForestClassifier(n_estimators=100, random_state=42)
        model.fit(X_train, y_train)
        joblib.dump(model, model_file)
        st.write("✅ Model trained and saved.")
    else:
        st.write("✅ Pre-trained model loaded.")
    
    # ---------------------------
    # Model Evaluation
    # ---------------------------
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    accuracy = model.score(X_test, y_test)
    st.subheader(f"Model Accuracy on Test Split: {accuracy:.2%}")
    
    cv_scores = cross_val_score(model, X, y, cv=5)
    st.write("Cross-Validation Scores:", cv_scores)
    st.write("Mean CV Accuracy:", cv_scores.mean())
    
    preds = model.predict(X_test)
    cm = confusion_matrix(y_test, preds)
    st.write("Confusion Matrix:")
    st.write(cm)
    st.text("Classification Report:\n" + classification_report(y_test, preds))
    
    # ---------------------------
    # Prediction Section
    # ---------------------------
    st.subheader("Make a Prediction")
    user_input = {}
    
    # For each feature in X, create input fields
    for col in X.columns:
        if col in encoders:
            # For categorical features, use selectbox with original labels
            original_categories = list(encoders[col].classes_)
            user_input[col] = st.selectbox(f"Select {col}", original_categories)
        else:
            # For numeric features, use number input
            min_val = float(X[col].min())
            max_val = float(X[col].max())
            median_val = float(X[col].median())
            user_input[col] = st.number_input(f"Enter value for {col}", min_val, max_val, median_val)
    
    if st.button("Predict"):
        # Debug: Display current user input
        st.write("Current user inputs:", user_input)
        input_df = pd.DataFrame([user_input])
        # Safely transform categorical columns
        for col, le in encoders.items():
            if col in input_df.columns:
                user_value = input_df[col].iloc[0]
                valid_classes = set(le.classes_)
                if user_value not in valid_classes:
                    st.error(f"Unseen category '{user_value}' for column '{col}'. Please pick a valid option.")
                    st.stop()
                else:
                    input_df[col] = le.transform(input_df[col])
        
        prediction = model.predict(input_df)[0]
        result_text = "ASD Positive" if prediction == 1 else "ASD Negative"
        st.success(f"Prediction: **{result_text}**")
    
    # ---------------------------
    # Data Visualization Section
    # ---------------------------
    st.subheader("Data Analysis")
    fig, ax = plt.subplots(figsize=(10, 6))
    sns.countplot(x=df[target_column], ax=ax)
    ax.set_title("Distribution of ASD Classification")
    st.pyplot(fig)
    
    # ---------------------------
    # Chatbot Section
    # ---------------------------
    st.subheader("Chatbot - Ask about ASD")
    st.write("Select a common question to get an answer:")
    
    question_options = list(sample_qa.keys())
    selected_question = st.selectbox("Common Questions", question_options)
    
    if st.button("Get Answer"):
        answer = sample_qa[selected_question]
        st.write(answer)
    
    st.markdown("**Disclaimer:** This tool is for educational purposes only and should not replace professional medical advice.")


Overwriting app.py


In [2]:
!streamlit run app.py


^C


In [3]:
!cd

C:\Users\KIIT


In [4]:
import os
print(os.getcwd())  


C:\Users\KIIT


In [5]:
import os
print(os.listdir())  


['.anaconda', '.bito', '.cache', '.conda', '.condarc', '.continuum', '.ipynb_checkpoints', '.ipython', '.jupyter', '.matplotlib', '.mozilla', '.ms-ad', '.nbi', '.oracle_jre_usage', '.profile', '.redhat', '.vivado_hls', '.vscode', '.Xilinx', '1styrresult.pdf', 'ai', 'AICTE Internship profile.pdf', 'anaconda3', 'app.py', 'AppData', 'Application Data', 'archive (3).zip', 'archive (4).zip', 'archive.zip', 'ASD_Project.ipynb', 'Autism-Child-Data.csv', 'Automobile.csv', 'c-prog', 'clg', 'Cloud_Fundamental.pptx[1].pdf', 'Contacts', 'Cookies', 'Documents', 'Downloads', 'Favorites', 'ibm', 'IMG_20241221_192029.jpg', 'IMG_5454.jpeg.jpg', 'IMG_5471.png', 'IMG_5578.jpeg.jpg', 'inAsc.txt', 'inDesc.txt', 'indian_summer_beauty_products.csv', 'inRand.txt', 'IntelGraphicsProfiles', 'Introduction_to_Cloud.pptx[1].pdf', 'KIIT HDFC Payment gateway.pdf', 'LAB1_3629.ipynb', 'LAB2_3629.ipynb', 'LAB3Dictinaryy.ipynb', 'LAB3Dictionary_3629.ipynb', 'LAB5.ipynb', 'LINEAR_REG.ipynb', 'Links', 'Local Settings', 'M

In [26]:
import pandas as pd

# Load the same dataset used by the app
df_full = pd.read_csv("Autism-Child-Data.csv")

# Find a row where the Class/ASD is "YES"
positive_rows = df_full[df_full["Class/ASD"] == "YES"]

if not positive_rows.empty:
    # Take the first known positive row
    known_positive = positive_rows.iloc[0]
    print("Known positive row found:\n", known_positive)
else:
    print("No rows with 'YES' found in the dataset!")


Known positive row found:
 id                             5
A1_Score                       1
A2_Score                       1
A3_Score                       1
A4_Score                       1
A5_Score                       1
A6_Score                       1
A7_Score                       1
A8_Score                       1
A9_Score                       1
A10_Score                      1
age                            5
gender                         m
ethnicity                 Others
jundice                      yes
austim                        no
contry_of_res      United States
used_app_before               no
result                        10
age_desc              4-11 years
relation                  Parent
Class/ASD                    YES
Name: 4, dtype: object
