In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np

#### Loading the Facebook Metrics Dataset

In [5]:
# Load the dataset

fb_data = pd.read_csv('dataset_facebook.csv')
print(fb_data)
# Display the first few rows
print("Facebook Metrics Dataset Shape:", fb_data.shape)
fb_data.head()

     pagetotallikes  type  category  postmonth  postweekday  posthour  paid  \
0            139441     2         2         12            4         3     0   
1            139441     3         2         12            3        10     0   
2            139441     2         3         12            3         3     0   
3            139441     2         2         12            2        10     1   
4            139441     2         2         12            2         3     0   
..              ...   ...       ...        ...          ...       ...   ...   
367           85093     2         3          1            7        10     0   
368           85093     2         3          1            7         2     0   
369           81370     2         2          1            5         8     0   
370           81370     2         1          1            5         2     0   
371           81370     2         3          1            4        11     0   

     lifetimeposttotalreach  lifetimeposttotalimpre

Unnamed: 0,pagetotallikes,type,category,postmonth,postweekday,posthour,paid,lifetimeposttotalreach,lifetimeposttotalimpressions,lifetimeengagedusers,lifetimepostconsumers,lifetimepostconsumptions,lifetimepostimpressionsbypeoplewhohavelikedyourpage,lifetimepostreachbypeoplewholikeyourpage,lifetimepeoplewhohavelikedyourpageandengagedwithyourpost,comment,like,share,totalinteractions
0,139441,2,2,12,4,3,0,2752,5091,178,109,159,3078,1640,119,4,79,17,100
1,139441,3,2,12,3,10,0,10460,19057,1457,1361,1674,11710,6112,1108,5,130,29,164
2,139441,2,3,12,3,3,0,2413,4373,177,113,154,2812,1503,132,0,66,14,80
3,139441,2,2,12,2,10,1,50128,87991,2211,790,1119,61027,32048,1386,58,1572,147,1777
4,139441,2,2,12,2,3,0,7244,13594,671,410,580,6228,3200,396,19,325,49,393


#### a. Create Data Subsets

In [7]:
# 1. Create a subset based on column selection
subset_columns = fb_data[['comment', 'like', 'share']]
print("Subset by columns shape:", subset_columns.shape)
subset_columns.head()

Subset by columns shape: (372, 3)


Unnamed: 0,comment,like,share
0,4,79,17
1,5,130,29
2,0,66,14
3,58,1572,147
4,19,325,49


In [9]:
# 2. Create a subset based on row filtering
subset_high_likes = fb_data[fb_data['like'] > 200]
print("Subset by high likes count:", subset_high_likes.shape)
subset_high_likes.head()

Subset by high likes count: (75, 19)


Unnamed: 0,pagetotallikes,type,category,postmonth,postweekday,posthour,paid,lifetimeposttotalreach,lifetimeposttotalimpressions,lifetimeengagedusers,lifetimepostconsumers,lifetimepostconsumptions,lifetimepostimpressionsbypeoplewhohavelikedyourpage,lifetimepostreachbypeoplewholikeyourpage,lifetimepeoplewhohavelikedyourpageandengagedwithyourpost,comment,like,share,totalinteractions
3,139441,2,2,12,2,10,1,50128,87991,2211,790,1119,61027,32048,1386,58,1572,147,1777
4,139441,2,2,12,2,3,0,7244,13594,671,410,580,6228,3200,396,19,325,49,393
6,139441,2,3,12,1,3,1,11692,19479,481,265,364,15432,9328,379,3,249,27,279
7,139441,2,3,12,7,9,1,13720,24137,537,232,305,19728,11056,422,0,325,14,339
10,139441,3,2,12,5,10,0,21744,42334,4258,4100,4540,37849,18952,3798,0,233,19,252


In [11]:
# 3. Create a subset using iloc (integer-position based indexing)
subset_iloc = fb_data.iloc[10:20, 2:6]
print("Subset using iloc shape:", subset_iloc.shape)
subset_iloc

Subset using iloc shape: (10, 4)


Unnamed: 0,category,postmonth,postweekday,posthour
10,2,12,5,10
11,2,12,5,10
12,2,12,5,10
13,2,12,5,3
14,2,12,4,5
15,2,12,3,10
16,3,12,3,3
17,1,12,2,12
18,3,12,2,3
19,3,12,1,11


In [13]:
# 4. Create a subset using loc (label-based indexing)
fb_data_reset = fb_data.reset_index()
subset_loc = fb_data_reset.loc[5:15, ['comment', 'like']]
print("Subset using loc shape:", subset_loc.shape)
subset_loc

Subset using loc shape: (11, 2)


Unnamed: 0,comment,like
5,1,152
6,3,249
7,0,325
8,0,161
9,3,113
10,0,233
11,0,88
12,0,90
13,5,137
14,2,577


#### b. Merge Data

In [15]:
# Create two dataframes to demonstrate merging
# First dataframe
df1 = fb_data[["like"]].copy()
df1.reset_index(inplace=True)

# Second dataframe
df2 = fb_data[["share"]].copy()
df2.reset_index(inplace=True)

print("DataFrame 1 shape:", df1.shape)
print("DataFrame 2 shape:", df2.shape)

# Show sample of both dataframes
print("\nDataFrame 1 sample:")
display(df1.head(3))
print("\nDataFrame 2 sample:")
display(df2.head(3))

DataFrame 1 shape: (372, 2)
DataFrame 2 shape: (372, 2)

DataFrame 1 sample:


Unnamed: 0,index,like
0,0,79
1,1,130
2,2,66



DataFrame 2 sample:


Unnamed: 0,index,share
0,0,17
1,1,29
2,2,14


In [17]:
# 1. Inner join (only keep rows found in both dataframes)
merged_inner = pd.merge(df1, df2, how='inner', suffixes=('_profile', '_performance'))
print("Inner join shape:", merged_inner.shape)
merged_inner.head()

Inner join shape: (372, 3)


Unnamed: 0,index,like,share
0,0,79,17
1,1,130,29
2,2,66,14
3,3,1572,147
4,4,325,49


In [19]:
# 2. Left join (keep all rows from df1)
merged_left = pd.merge(df1, df2, how='left', suffixes=('_profile', '_performance'))
print("Left join shape:", merged_left.shape)
merged_left.head()

Left join shape: (372, 3)


Unnamed: 0,index,like,share
0,0,79,17
1,1,130,29
2,2,66,14
3,3,1572,147
4,4,325,49


In [21]:
# 3. Right join (keep all rows from df2)
merged_right = pd.merge(df1, df2, how='right', suffixes=('_profile', '_performance'))
print("Right join shape:", merged_right.shape)
merged_right.head()

Right join shape: (372, 3)


Unnamed: 0,index,like,share
0,0,79,17
1,1,130,29
2,2,66,14
3,3,1572,147
4,4,325,49


In [23]:
# 4. Outer join (keep all rows from both dataframes)
merged_outer = pd.merge(df1, df2, how='outer', suffixes=('_profile', '_performance'))
print("Outer join shape:", merged_outer.shape)
merged_outer.head()

Outer join shape: (372, 3)


Unnamed: 0,index,like,share
0,0,79,17
1,1,130,29
2,2,66,14
3,3,1572,147
4,4,325,49


#### c. Sort Data

In [26]:
# 1. Sort by a single column (ascending)
sorted_likes = fb_data.sort_values(by='like')
print("Sorted by Page total likes (ascending):")
sorted_likes[['like']].head(30)

Sorted by Page total likes (ascending):


Unnamed: 0,like
76,0
290,0
314,0
21,0
100,0
120,1
301,2
295,2
302,3
121,3


In [28]:
# 2. Sort by a single column (descending)
sorted_likes_desc = fb_data.sort_values(by='share', ascending=False)
print("Sorted by Page total likes (descending):")
sorted_likes_desc[['share']].head(30)

Sorted by Page total likes (descending):


Unnamed: 0,share
333,181
3,147
105,139
252,128
244,123
222,122
264,109
353,99
159,98
315,97


#### d. Transposing Data

In [31]:
# 1. Create a small subset for demonstration
small_subset = fb_data.iloc[:5, :5]
print("Original data shape:", small_subset.shape)
display(small_subset)

Original data shape: (5, 5)


Unnamed: 0,pagetotallikes,type,category,postmonth,postweekday
0,139441,2,2,12,4
1,139441,3,2,12,3
2,139441,2,3,12,3
3,139441,2,2,12,2
4,139441,2,2,12,2


In [33]:
# 2. Transpose the data (rows become columns and vice versa)
transposed_data = small_subset.T
print("Transposed data shape:", transposed_data.shape)
display(transposed_data)

Transposed data shape: (5, 5)


Unnamed: 0,0,1,2,3,4
pagetotallikes,139441,139441,139441,139441,139441
type,2,3,2,2,2
category,2,2,3,2,2
postmonth,12,12,12,12,12
postweekday,4,3,3,2,2


#### e. Shape and Reshape Data

In [36]:
# 1. Check the current shape of the dataset
print("Original dataset shape:", fb_data.shape)

# 2. Extract numeric columns for reshaping operations
numeric_cols = fb_data.select_dtypes(include=['float64', 'int64']).columns
numeric_data = fb_data[numeric_cols]
print("Numeric data shape:", numeric_data.shape)
print("Numeric columns:", list(numeric_cols)[:5], "...")

Original dataset shape: (372, 19)
Numeric data shape: (372, 19)
Numeric columns: ['pagetotallikes', 'type', 'category', 'postmonth', 'postweekday'] ...


In [38]:
# 3. Convert the dataframe to numpy array for reshaping
array_data = numeric_data.iloc[:100, :10].values 
print("Original array shape:", array_data.shape)

Original array shape: (100, 10)


In [40]:
# 4. Reshape to 1D array
reshaped_1d = array_data.reshape(-1) 
print("1D array shape:", reshaped_1d.shape)
print("First 10 elements:", reshaped_1d[:10])

1D array shape: (1000,)
First 10 elements: [139441      2      2     12      4      3      0   2752   5091    178]


In [42]:
# 5. Reshape to different 2D shapes
# Calculate the total number of elements
total_elements = array_data.size
print(f"Total elements: {total_elements}")

# Reshape to (20, total_elements/20)
cols = total_elements // 20
reshaped_2d = array_data.reshape(20, cols)
print("Reshaped to 2D array with shape:", reshaped_2d.shape)
print("Sample of reshaped data:")
print(reshaped_2d[:3, :5]) 

Total elements: 1000
Reshaped to 2D array with shape: (20, 50)
Sample of reshaped data:
[[139441      2      2     12      4]
 [139441      3      2     12      1]
 [139441      3      2     12      5]]
