# Post Analysis

**This notebook is used to analyze the different posts done during the communication campaign**

## Import libraries

In [None]:
# Import standard libraries
import os

In [None]:
# Import librairies for data analysis and visualization
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.pyplot import figure

In [None]:
# Import internal modules
from src import plot
from src import utility
from src import features

## Upload data

In [None]:
# Specifiy here the path your data and image folder
DATA_PATH = f'{os.getcwd()}/data'
IMG_PATH = f'{os.getcwd()}/img'

In [None]:
# Specifiy here the name of the file you want to use for this analysis
POST_FILE = 'instagram-posts_2023-08-07_2023-09-15.csv'
REEL_FILE = 'instagram-reels_2023-08-07_2023-09-15.csv'
STORY_FILE = 'instagram-stories_2023-08-07_2023-09-15.csv'

In [None]:
# Upload the data and put date as index (rows are sorted by date)
df1 = utility.read_file(POST_FILE, date_col='Timestamp', data_path=DATA_PATH)
df2 = utility.read_file(REEL_FILE, date_col='Date', data_path=DATA_PATH)
df3 = utility.read_file(STORY_FILE, date_col='Timestamp', data_path=DATA_PATH)

In [None]:
# Put the dataframe in a dictionnary (used to apply functions to the different dataset in parallel)
dfs_dict = {'post':df1, 'reel':df2, 'story':df3}

## Add features

In [None]:
# Set the numero of post for each publication
features.set_publication_numero(dfs_dict)

In [None]:
# Count the number of characters for each publication
features.count_content_length(dfs_dict)

In [None]:
# Count the time between the actual and the last publication
features.time_since_last_publication(dfs_dict)

## Process data

In [None]:
## Concat the different dataframe into a single one
df = pd.concat([df1,df2,df3], axis=0)
# Sort by dates
df.sort_index(inplace=True)

In [None]:
# Visualize the different columns provided
df.columns

In [None]:
# Put the dataframe in a dictionnary (used to apply functions to the different dataset in parallel)
df_dict = {'global':df}

In [None]:
# Set the numero of post for each publication
features.set_publication_numero(df_dict)
# Count the time between the actual and the last publication
features.time_since_last_publication(df_dict)

## Explore

In [None]:
# Plot different graph, allowing an analysis of the Reach during the communication campaign
plot.plot_chain(dfs_dict, 'Reach (Organic)', save_img_path=f'{IMG_PATH}/Reach')

In [None]:
# Plot different graph, allowing an analysis of the Reach during the communication campaign
plot.plot_chain(df_dict, 'Reach (Organic)', save_img_path=f'{IMG_PATH}/Reach')