In [None]:
import pandas as pd
from pymongo import MongoClient
import seaborn as sns
import matplotlib.pyplot as plt



# Load the dataset
data = pd.read_csv('Resources/Impact_of_Remote_Work_on_Mental_Health.csv')  # Update with your dataset path

# Connect to MongoDB
client = MongoClient('mongodb://localhost:27017/')
db = client['remote_work_db']  # Create a new database
collection = db['mental_health']  # Create a new collection

# Insert data into MongoDB
collection.insert_many(data.to_dict('records'))

print("Data loaded successfully!")


In [None]:
# Retrieve data from MongoDB
data = pd.DataFrame(list(collection.find()))

# Display initial data info
print(data.info())

In [None]:
missing_count = data.isnull().sum()
print(missing_count[missing_count > 0]) 

In [None]:
# Display unique values in the Mental_Health_Condition column
unique_conditions = data['Mental_Health_Condition'].unique()
print(unique_conditions)


In [5]:
# Replace nan values with 'None'
data['Mental_Health_Condition'] = data['Mental_Health_Condition'].fillna('None')



In [None]:
# Display unique values in the Physical_Activity column
unique_conditions = data['Physical_Activity'].unique()
print(unique_conditions)

In [7]:
# Replace nan values with 'None'
data['Physical_Activity'] = data['Physical_Activity'].fillna('None')

In [None]:
# Count unique occurrences for each mental health condition
mental_health_unique_counts = data.groupby('Mental_Health_Condition')['Employee_ID'].nunique().reset_index()
mental_health_unique_counts.columns = ['Mental_Health_Condition', 'Unique_Employee_Count']

# Count unique occurrences for each physical activity status
physical_activity_unique_counts = data.groupby('Physical_Activity')['Employee_ID'].nunique().reset_index()
physical_activity_unique_counts.columns = ['Physical_Activity', 'Unique_Employee_Count']

# Display results
print("Unique counts for Mental Health Conditions:")
print(mental_health_unique_counts)

print("\nUnique counts for Physical Activity:")
print(physical_activity_unique_counts)


In [None]:
# Check for missing values
print(data.isnull().sum())

In [None]:
# Check for duplicates
duplicates = data.duplicated().sum()
print(f"Number of duplicate records: {duplicates}")

In [11]:
# Create a cleaned DataFrame
data_cleaned = data.copy()

# Reset index
data_cleaned.reset_index(drop=True, inplace=True)

In [None]:
# Summary statistics for numerical columns
summary_stats = data_cleaned.describe()
print(summary_stats)

# Summary statistics for categorical columns
categorical_summary = data_cleaned.describe(include='object')
print(categorical_summary)


In [None]:
# Display shape and data types
print(f"Shape of the dataset: {data_cleaned.shape}")
print(data_cleaned.dtypes)

# Display the first few rows
print(data_cleaned.head())
