In [1]:
import pandas as pd
import re
from scipy.stats import pearsonr

In [2]:
# Read the dataset
data = pd.read_csv("DataSet/depression_scale_PHQ.csv")


In [3]:
# Extract numeric values from scores within parentheses
def extracting_score(score_string):
    match = re.search(r'\((\d+)\)', score_string)
    return int(match.group(1)) if match else 0  # Ternary operator 

In [4]:
# Convert string scores to numeric values using apply and lambda
data[data.columns[9:]] = data[data.columns[9:]].apply(lambda col: col.apply(extracting_score))

In [5]:
# Calculate collective PHQ-9 score using sum across columns
data['PHQ9_score'] = data.iloc[:, 9:].sum(axis=1)

In [6]:
# Classify individuals into different depression states using cut
bins = [-1, 4, 9, 14, 19, 27, float('inf')]  # Define bins for depression levels
labels = ['No depression', 'Mild depression', 'Moderate depression', 'Moderately severe depression',
          'Severe depression', 'Invalid score']
data['Depression_state'] = pd.cut(data['PHQ9_score'], bins=bins, labels=labels, right=False)

In [7]:
# Count the number of people in each state of depression
depression_counts = data['Depression_state'].value_counts()

In [8]:
# Convert age range strings to numeric values using map
data['Age_midpoint'] = data['Age range in years'].map({'Above 30': 35, '15-30': 22.5})

In [9]:
# Calculate correlation coefficient between Age range and PHQ-9 collective score
correlation_coefficient, _ = pearsonr(data['Age_midpoint'], data['PHQ9_score'])

In [10]:
# Print results
print("Number of people in each state of depression:")
print(depression_counts)
print("\nCorrelation coefficient between Age range and PHQ-9 collective score:", correlation_coefficient)


Number of people in each state of depression:
Depression_state
Mild depression                 279
Moderate depression             219
Moderately severe depression     88
No depression                    70
Severe depression                16
Invalid score                     0
Name: count, dtype: int64

Correlation coefficient between Age range and PHQ-9 collective score: 0.00416829313887653
