In [19]:
import pandas as pd

# Load data
try:
	df = pd.read_csv("./diabetes.csv")
except FileNotFoundError:
	print("File not found: data/raw/diabetes.csv. Please check the file path.")
	df = None

if df is not None:
    # Check first rows
    print(df.head())

    # Basic info
    print(df.info())

    # Check for missing values
    print(df.isnull().sum())
else:
    print("Dataframe is None. Skipping data analysis steps.")


   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 768 entries, 0 to 767
Data columns (total 9 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   Pregnancies               768 non-null    int64  
 1   Glucose                   768

In [17]:
import os

# Check current working directory
print("Current working directory:", os.getcwd())

# List files in the directory
print("Files in directory:", os.listdir("data/raw"))

Current working directory: /Users/shubh/early-disease-detection/notebooks


FileNotFoundError: [Errno 2] No such file or directory: 'data/raw'

In [18]:
# The error occurs because the file "early-disease-detection/data/raw/diabetes.csv" was not found,
# so df is set to None. Let's check the correct path and try to load the file again.

# Try alternative paths or prompt user to check the file location
possible_paths = [
    "early-disease-detection/data/raw/diabetes.csv",
    "data/raw/diabetes.csv",
    "./diabetes.csv"
]

for path in possible_paths:
    if os.path.exists(path):
        df = pd.read_csv(path)
        print(f"Loaded data from: {path}")
        print(df.head())
        break
else:
    print("Could not find the diabetes.csv file in any of the expected locations. Please check the file path.")

Loaded data from: ./diabetes.csv
   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \
0            6      148             72             35        0  33.6   
1            1       85             66             29        0  26.6   
2            8      183             64              0        0  23.3   
3            1       89             66             23       94  28.1   
4            0      137             40             35      168  43.1   

   DiabetesPedigreeFunction  Age  Outcome  
0                     0.627   50        1  
1                     0.351   31        0  
2                     0.672   32        1  
3                     0.167   21        0  
4                     2.288   33        1  


In [15]:

# The variable 'path' contains the value './diabetes.csv', which indicates the file was found at this location.
print(f"The file is located at: {path}")

The file is located at: ./diabetes.csv
