# Exploring the Extrovert-Introvert Behavior Dataset via JupyterLab and Google Cloud Big Query
---

In [1]:
# Initialize the Big Query Client and Auth
from google.cloud import bigquery, storage
import os

# Set up authentication (ensure you have a service account key)
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "../env/ccbd-exam-2025-darnall-939537d72960.json"

# Initialize BigQuery and Cloud Storage clients
bq_client = bigquery.Client()
gcs_client = storage.Client()

In [15]:
# Connect to the Big Query and Bucket assets
dataset_id = "ccbd-exam-2025-darnall.intro_extro_behavior"
table_id = "intro_extro_data"
# gcs_uri = "gs://ccbd-exam-2025-darnall-gc-bucket/datasets/introvert-vs-extrovert-behavior/personality_dataset.csv"

# Assess wether posting less frequently means you are more introverted (as strange as it sounds)
query = f"""
SELECT Personality, COUNT(*) as Post_Frequency 
FROM `ccbd-exam-2025-darnall.intro_extro_behavior.intro_extro_data` 
where Post_frequency < 5 
group by Personality;
"""

query_1 = bq_client.query(query).to_dataframe()
query_1.head()



Unnamed: 0,Personality,Post_Frequency
0,Introvert,1305
1,Extrovert,485


In [16]:
# Assess how many introverts attend less that 2 hours of 'social life'
query = f"""
SELECT Personality, COUNT(*) as Social_Event_Attendance 
FROM `ccbd-exam-2025-darnall.intro_extro_behavior.intro_extro_data` 
where Social_event_attendance <= 2 
group by Personality;
"""

query_2 = bq_client.query(query).to_dataframe()
query_2.head()



Unnamed: 0,Personality,Social_Event_Attendance
0,Introvert,1004
1,Extrovert,104


In [17]:
# Assess how many individuals from both personality types are in the 'mean' ammount of outdoors time (so if they could be considered similar or not)
query = f"""
SELECT Personality, COUNT(*) as Outdoors_Time 
FROM `ccbd-exam-2025-darnall.intro_extro_behavior.intro_extro_data` 
where Going_outside = 3 
group by Personality;
"""

query_3 = bq_client.query(query).to_dataframe()
query_3.head()



Unnamed: 0,Personality,Outdoors_Time
0,Extrovert,183
1,Introvert,26


In [18]:
# Assess how many individuals from both personality types are in the 'mean' ammount of outdoors time (so if they could be considered similar or not)
query = f"""
SELECT Personality, COUNT(*) as Outdoors_Time 
FROM `ccbd-exam-2025-darnall.intro_extro_behavior.intro_extro_data` 
where Going_outside = 2
group by Personality;
"""

query_4 = bq_client.query(query).to_dataframe()
query_4.head()



Unnamed: 0,Personality,Outdoors_Time
0,Introvert,415
1,Extrovert,41


In [19]:
# Assess how many individuals from both personality types are in the 'mean' ammount of outdoors time (so if they could be considered similar or not)
query = f"""
SELECT Personality, COUNT(*) as Outdoors_Time 
FROM `ccbd-exam-2025-darnall.intro_extro_behavior.intro_extro_data` 
where Going_outside = 1
group by Personality;
"""

query_5 = bq_client.query(query).to_dataframe()
query_5.head()



Unnamed: 0,Personality,Outdoors_Time
0,Introvert,405
1,Extrovert,24


---

In [20]:
# Assess friend group sizes per personality types
query = f"""
SELECT Personality, COUNT(Friends_circle_size) as Total_Friends
FROM `ccbd-exam-2025-darnall.intro_extro_behavior.intro_extro_data`
group by Personality
"""

query_6 = bq_client.query(query).to_dataframe()
query_6.head()



Unnamed: 0,Personality,Total_Friends
0,Introvert,1372
1,Extrovert,1451


Suprisingly enough, both introverts and extroverts (at least as the sapmple goes) have the same ammounts of friends (which means they are NOT anti-social)

In [21]:
# Assess the average friend group per personality
query = f"""
SELECT Personality, AVG(Friends_circle_size) as Avg_Friend_Group_Size
FROM `ccbd-exam-2025-darnall.intro_extro_behavior.intro_extro_data`
group by Personality
"""

query_7 = bq_client.query(query).to_dataframe()
query_7.head()



Unnamed: 0,Personality,Avg_Friend_Group_Size
0,Introvert,3.196793
1,Extrovert,9.173673


So whilst in ***absolute***, both personality types do have roughly the same ammounts of friends, on average, introverts tend to be more 'picky' and have smaller friend groups, allthgough the stability of said friendgroup cannot be determinted from this dataset

---