In [53]:
# Sample Research: Social Media Impact on Anxiety
## Step 1: Load Data

print('=== SEE GENERAL INFORMATION ===\n')

import pandas as pd

df = pd.read_csv('../data/sample.csv', sep=';') # Pandas defolts to commas ( , ), so it reads everythin as one column
print(df) # See all rows

=== SEE GENERAL INFORMATION ===

    patient_id  age  anxiety_score  social_media_time
0            1   20              5                  2
1            2   19              6                  2
2            3   18              3                  1
3            4   24              7                  5
4            5   23              4                  3
5            6   25              7                  4
6            7   22              9                  6
7            8   21              1                  1
8            9   18              1                  1
9           10   26              6                  4
10          11   23              5                  3
11          12   22              4                  3
12          13   22              7                  4
13          14   22              8                  5
14          15   21              3                  2
15          16   25              4                  1
16          17   20              9               

In [54]:
# Filter first 10 rows

print('First 10 rows:')
print(df.head(10))

First 10 rows:
   patient_id  age  anxiety_score  social_media_time
0           1   20              5                  2
1           2   19              6                  2
2           3   18              3                  1
3           4   24              7                  5
4           5   23              4                  3
5           6   25              7                  4
6           7   22              9                  6
7           8   21              1                  1
8           9   18              1                  1
9          10   26              6                  4


In [55]:
# Filter last 7 raws

print('Last 7 rows:')
print(df.tail(7))

Last 7 rows:
    patient_id  age  anxiety_score  social_media_time
13          14   22              8                  5
14          15   21              3                  2
15          16   25              4                  1
16          17   20              9                  6
17          18   18              9                  6
18          19   19              3                  2
19          20   19              4                  2


In [56]:
# We can see basic information from our .csv 

print('\n=== SEE BASIC INFORMATION ===\n')

print(f'Dataset shape: {df.shape}\n') # Dataset shape
print(f'Dataset info:')
print(df.info()) # Basic info


=== SEE BASIC INFORMATION ===

Dataset shape: (20, 4)

Dataset info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20 entries, 0 to 19
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype
---  ------             --------------  -----
 0   patient_id         20 non-null     int64
 1   age                20 non-null     int64
 2   anxiety_score      20 non-null     int64
 3   social_media_time  20 non-null     int64
dtypes: int64(4)
memory usage: 768.0 bytes
None


In [57]:
# We also can see statistical summary

print('\nStatistical summary:')
print(df.describe())


Statistical summary:
       patient_id        age  anxiety_score  social_media_time
count    20.00000  20.000000      20.000000          20.000000
mean     10.50000  21.350000       5.250000           3.150000
std       5.91608   2.476734       2.489451           1.755443
min       1.00000  18.000000       1.000000           1.000000
25%       5.75000  19.000000       3.750000           2.000000
50%      10.50000  21.500000       5.000000           3.000000
75%      15.25000  23.000000       7.000000           4.250000
max      20.00000  26.000000       9.000000           6.000000


In [59]:
# We analyze patients over or less 25 separatly for some reasons

print('\n=== FILTERING ===\n')

adults = df[df['age'] >= 25]
youngs = df[df['age'] < 25]

print('Patients over 25:\n')
print(adults)
print('\nPatients less 25:\n')
print(youngs)


=== FILTERING ===

Patients over 25:

    patient_id  age  anxiety_score  social_media_time
5            6   25              7                  4
9           10   26              6                  4
15          16   25              4                  1

Patients less 25:

    patient_id  age  anxiety_score  social_media_time
0            1   20              5                  2
1            2   19              6                  2
2            3   18              3                  1
3            4   24              7                  5
4            5   23              4                  3
6            7   22              9                  6
7            8   21              1                  1
8            9   18              1                  1
10          11   23              5                  3
11          12   22              4                  3
12          13   22              7                  4
13          14   22              8                  5
14          15   21    

In [50]:
df = pd.read_csv('../data/sample.csv')

print("=== DIAGNOSIS ===")
print("Columns in your CSV:", df.columns.tolist()) # If there is some problems with columns names

=== DIAGNOSIS ===
Columns in your CSV: ['patient_id;age;anxiety_score;social_media_time']


In [65]:
high_anxiety = df[df['anxiety_score'] >= 7]
low_anxiety = df[df['anxiety_score'] < 7]

print('\nPatients with high level of anxiety:')
print(high_anxiety)
print('\nPatients with low level of anxiety:')
print(low_anxiety)


Patients with high level of anxiety:
    patient_id  age  anxiety_score  social_media_time
3            4   24              7                  5
5            6   25              7                  4
6            7   22              9                  6
12          13   22              7                  4
13          14   22              8                  5
16          17   20              9                  6
17          18   18              9                  6

Patients with low level of anxiety:
    patient_id  age  anxiety_score  social_media_time
0            1   20              5                  2
1            2   19              6                  2
2            3   18              3                  1
4            5   23              4                  3
7            8   21              1                  1
8            9   18              1                  1
9           10   26              6                  4
10          11   23              5                  3
11     

In [67]:
high_smt = df[df['social_media_time'] >= 4] # Smt = social_media_time
low_smt = df[df['social_media_time'] < 4]

print('\nPatients spend more 4 hours in social media:')
print(high_smt)
print('\nPatiens spend less 4 hours in social media:')
print(low_smt)


Patients spend more 4 hours in social media:
    patient_id  age  anxiety_score  social_media_time
3            4   24              7                  5
5            6   25              7                  4
6            7   22              9                  6
9           10   26              6                  4
12          13   22              7                  4
13          14   22              8                  5
16          17   20              9                  6
17          18   18              9                  6

Patiens spend less 4 hours in social media:
    patient_id  age  anxiety_score  social_media_time
0            1   20              5                  2
1            2   19              6                  2
2            3   18              3                  1
4            5   23              4                  3
7            8   21              1                  1
8            9   18              1                  1
10          11   23              5           

In [80]:
print('\n=== SELECTING COLUMN SUBSETS ===\n')

# Get only age column (returns Series)

ages = df['age']
print('\nAll ages:')
print(ages)


=== SELECTING COLUMN SUBSETS ===


All ages:
0     20
1     19
2     18
3     24
4     23
5     25
6     22
7     21
8     18
9     26
10    23
11    22
12    22
13    22
14    21
15    25
16    20
17    18
18    19
19    19
Name: age, dtype: int64


In [79]:
# Get Multiple Columns

subset = df[['age', 'anxiety_score']]
print('\nAge and anxiety score:')
print(subset)


Age and anxiety score:
    age  anxiety_score
0    20              5
1    19              6
2    18              3
3    24              7
4    23              4
5    25              7
6    22              9
7    21              1
8    18              1
9    26              6
10   23              5
11   22              4
12   22              7
13   22              8
14   21              3
15   25              4
16   20              9
17   18              9
18   19              3
19   19              4


In [77]:
# Get Columns by position

first_three = df.iloc[:, 0:3]
print('\nFirst three columns:')
print(first_three)


First three columns:
    patient_id  age  anxiety_score
0            1   20              5
1            2   19              6
2            3   18              3
3            4   24              7
4            5   23              4
5            6   25              7
6            7   22              9
7            8   21              1
8            9   18              1
9           10   26              6
10          11   23              5
11          12   22              4
12          13   22              7
13          14   22              8
14          15   21              3
15          16   25              4
16          17   20              9
17          18   18              9
18          19   19              3
19          20   19              4


In [81]:
# Combining Subsets with Filtering

high_anxiety_level_patients = df[df['anxiety_score'] >= 7][['age', 'social_media_time', 'anxiety_score']]
print('\nHigh anxiety patients (selected columns):')
print(high_anxiety_level_patients)


High anxiety patients (selected columns):
    age  social_media_time  anxiety_score
3    24                  5              7
5    25                  4              7
6    22                  6              9
12   22                  4              7
13   22                  5              8
16   20                  6              9
17   18                  6              9


In [82]:
# Saving Filtered Data to CSV

high_smt.to_csv('../data/high_social_media_time.csv', index=False) # Smt = social_media_time
low_smt.to_csv('../data/low_social_media_time.csv', index=False)
high_anxiety.to_csv('../data/high_anxiety.csv', index=False)
low_anxiety.to_csv('../data/low_anxiety.csv', index=False)