In [1]:
from src.task_a import (
    load_data,
    get_demographic_info,
    get_med_hist_by_ethnicity,
    long_survivors,
    high_risk_patients,
)
from src.task_b import (
    load_dataframe,
    top_treatments_by_ethnicity,
    avg_wbc_by_treatment,
    smoking_analysis,
    survival_and_haemoglobin_by_ethnicity,
)
from src.task_c import (
    treatment_pie_chart,
    smoking_trend,
    blood_pressure_comparison,
    survival_by_stage,
)

In [2]:
def main():
    file_path = input("Enter file path to the CSV file: ").strip()

    data = load_data(file_path)  # should return list of dicts
    df = load_dataframe(file_path)  # should return pandas DataFrame

    if not data:
        print("Failed to load CSV data.")
        return
    else:
        print(f"Loaded {len(data)} records from CSV.")

    if df is None or df.empty:
        print("Failed to load DataFrame.")
        return
    else:
        print(f"DataFrame loaded with shape {df.shape}.")

    # Menu
    menu_options = {
        "1": ("Retrieve Demographic Info", lambda: get_demographic_info(data)),
        "2": ("Retrieve Medical History", lambda: get_med_hist_by_ethnicity(data)),
        "3": ("Retrieve Long Survival by Treatment", lambda: long_survivors(data)),
        "4": ("Retrieve Custom Info", lambda: high_risk_patients(data)),
        "5": ("Top 3 Treatments", lambda: top_treatments_by_ethnicity(df)),
        "6": ("Average WBC Count", lambda: avg_wbc_by_treatment(df)),
        "7": ("Average Smoking Packs", lambda: smoking_analysis(df)),
        "8": ("Survival by Stage", lambda: survival_and_haemoglobin_by_ethnicity(df)),
        "9": ("Visualize Treatment Proportion", lambda: treatment_pie_chart(df)),
        "10": ("Visualize Smoking Packs by Stage", lambda: smoking_trend(df)),
        "11": ("Visualize Blood Pressure", lambda: blood_pressure_comparison(df)),
        "12": ("Visualize Effectiveness by Age", lambda: survival_by_stage(df)),
        "13": ("Exit", None),
    }

    while True:
        print("\nLung Cancer Data Analysis Menu:")
        for key, (desc, _) in sorted(menu_options.items(), key=lambda x: int(x[0])):
            print(f"{key}. {desc}")

        choice = input("\nEnter your choice: ").strip()
        action = menu_options.get(choice)

        if choice == "13":
            print("Exit")
            break

        if action:
            func = action[1]
            result = func()

            if result is not None:
                if isinstance(result, (list, dict)):
                    print("\n Result:")
                    print(result)
                else:
                    try:
                        # If result is a DataFrame or Series, print nicely
                        print("\nResult:")
                        print(result.to_string(index=False))
                    except:
                        print(result)
        else:
            print("Invalid choice. Please try again.")


main()

Loaded 23658 records from CSV.
DataFrame loaded with shape (23658, 38).

Lung Cancer Data Analysis Menu:
1. Retrieve Demographic Info
2. Retrieve Medical History
3. Retrieve Long Survival by Treatment
4. Retrieve Custom Info
5. Top 3 Treatments
6. Average WBC Count
7. Average Smoking Packs
8. Survival by Stage
9. Visualize Treatment Proportion
10. Visualize Smoking Packs by Stage
11. Visualize Blood Pressure
12. Visualize Effectiveness by Age
13. Exit

 Result:
{'Age': '54', 'Gender': 'Female', 'Smoking_History': 'Former Smoker', 'Ethnicity': 'Asian'}

Lung Cancer Data Analysis Menu:
1. Retrieve Demographic Info
2. Retrieve Medical History
3. Retrieve Long Survival by Treatment
4. Retrieve Custom Info
5. Top 3 Treatments
6. Average WBC Count
7. Average Smoking Packs
8. Survival by Stage
9. Visualize Treatment Proportion
10. Visualize Smoking Packs by Stage
11. Visualize Blood Pressure
12. Visualize Effectiveness by Age
13. Exit

 Result:
[{'Patient_ID': '18647', 'Family_History': 'No',

In [3]:
file_path = input("Enter file path to the CSV file: ").strip()

data = load_data(file_path)  # should return list of dicts
df = load_dataframe(file_path)

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23658 entries, 0 to 23657
Data columns (total 38 columns):
 #   Column                            Non-Null Count  Dtype  
---  ------                            --------------  -----  
 0   Patient_ID                        23658 non-null  int64  
 1   Age                               23658 non-null  int64  
 2   Gender                            23658 non-null  object 
 3   Smoking_History                   23658 non-null  object 
 4   Tumor_Size_mm                     23658 non-null  float64
 5   Tumor_Location                    23658 non-null  object 
 6   Stage                             23658 non-null  object 
 7   Treatment                         23658 non-null  object 
 8   Survival_Months                   23658 non-null  int64  
 9   Ethnicity                         23658 non-null  object 
 10  Insurance_Type                    23658 non-null  object 
 11  Family_History                    23658 non-null  object 
 12  Como