In [2]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import os
import json

In [3]:
# Step 1: Define the directory containing your JSON files
json_dir = 'D://Study//Project//Minor Project//Categories//Non-profit//Brand Data'  # Update this path accordingly

# Step 2: List all JSON files in the directory
json_files = [file for file in os.listdir(json_dir) if file.endswith('.json')]

# Step 3: Read each JSON file into a DataFrame and store them in a list
dfs = []
for file_name in json_files:
    file_path = os.path.join(json_dir, file_name)
    df = pd.read_json(file_path)
    dfs.append(df)

# Step 4: Concatenate or merge the DataFrames in the list into one DataFrame
brand_df = pd.concat(dfs, ignore_index=True)

In [4]:
brand_df['timestamp'].head()

0   2024-02-27 07:42:58+00:00
1   2024-02-26 11:23:48+00:00
2   2024-02-26 11:32:40+00:00
3   2024-02-27 07:40:45+00:00
4   2024-02-26 10:40:03+00:00
Name: timestamp, dtype: datetime64[ns, UTC]

In [5]:
brand_df.columns

Index(['inputUrl', 'id', 'type', 'shortCode', 'caption', 'hashtags',
       'mentions', 'url', 'commentsCount', 'firstComment', 'latestComments',
       'dimensionsHeight', 'dimensionsWidth', 'displayUrl', 'images',
       'likesCount', 'timestamp', 'childPosts', 'ownerFullName',
       'ownerUsername', 'ownerId', 'isSponsored', 'alt', 'videoUrl',
       'videoViewCount', 'videoPlayCount', 'productType', 'videoDuration',
       'musicInfo', 'taggedUsers', 'locationName', 'locationId',
       'coauthorProducers', 'isPinned', 'paidPartnership', 'sponsors',
       'username', 'fullName', 'biography', 'externalUrl',
       'externalUrlShimmed', 'followersCount', 'followsCount', 'hasChannel',
       'highlightReelCount', 'isBusinessAccount', 'joinedRecently',
       'businessCategoryName', 'private', 'verified', 'profilePicUrl',
       'profilePicUrlHD', 'facebookPage', 'igtvVideoCount', 'relatedProfiles',
       'latestIgtvVideos', 'postsCount', 'latestPosts'],
      dtype='object')

In [6]:
comments_df = pd.read_json(r"D:\Study\Project\Minor Project\Categories\Non-profit\All_comments_combined_nonprofit.json")

In [7]:
comments_df.head()

Unnamed: 0,postUrl,id,text,ownerUsername,ownerProfilePicUrl,timestamp,likesCount
0,https://www.instagram.com/p/CUciC-QrN6P/,17920300183921040,‚ù§Ô∏èüíúüíô,aldenrmachado,https://scontent-iad3-1.cdninstagram.com/v/t51...,2021-09-30 15:28:08,0
1,https://www.instagram.com/p/CUciC-QrN6P/,17924187259878484,üëè,jan_sahyog_kendra,https://scontent-iad3-1.cdninstagram.com/v/t51...,2021-10-01 01:21:06,0
2,https://www.instagram.com/p/CnEjZvoIWE-/,17959809041214648,üôå‚ù§Ô∏è,kiran_gupta_26,https://instagram.fmnl4-1.fna.fbcdn.net/v/t51....,2023-01-06 14:49:07,0
3,https://www.instagram.com/p/CnEjZvoIWE-/,18070043713338176,I am from Gadchiroli so can I join with the go...,mamtajaunjalkar,https://instagram.fmnl4-1.fna.fbcdn.net/v/t51....,2023-01-07 15:02:45,0
4,https://www.instagram.com/p/CXAwt4WokhX/,17924857570960640,Great,divyajeevanngo,https://instagram.fdxb1-1.fna.fbcdn.net/v/t51....,2021-12-03 07:26:07,0


In [8]:
comments_df.columns

Index(['postUrl', 'id', 'text', 'ownerUsername', 'ownerProfilePicUrl',
       'timestamp', 'likesCount'],
      dtype='object')

In [9]:
main_df = comments_df[['postUrl', 'timestamp']]

In [10]:
main_df.head()

Unnamed: 0,postUrl,timestamp
0,https://www.instagram.com/p/CUciC-QrN6P/,2021-09-30 15:28:08
1,https://www.instagram.com/p/CUciC-QrN6P/,2021-10-01 01:21:06
2,https://www.instagram.com/p/CnEjZvoIWE-/,2023-01-06 14:49:07
3,https://www.instagram.com/p/CnEjZvoIWE-/,2023-01-07 15:02:45
4,https://www.instagram.com/p/CXAwt4WokhX/,2021-12-03 07:26:07


In [11]:
main_df = main_df.rename(columns={'timestamp': 'comment_timestamp'})

In [12]:
merged_df = pd.merge(main_df, brand_df[['url', 'likesCount', 'timestamp', 'mentions', 'commentsCount', 'hashtags']], 
                     left_on='postUrl', right_on='url', how='left')

# Drop the redundant 'url' column
merged_df = merged_df.drop(columns=['url'])

In [13]:
main_df = merged_df

In [14]:
main_df.head()

Unnamed: 0,postUrl,comment_timestamp,likesCount,timestamp,mentions,commentsCount,hashtags
0,https://www.instagram.com/p/CUciC-QrN6P/,2021-09-30 15:28:08,107.0,2021-09-30 12:29:35+00:00,"[anshugoonj, roopshadasguupta]",2.0,[Goonj]
1,https://www.instagram.com/p/CUciC-QrN6P/,2021-10-01 01:21:06,107.0,2021-09-30 12:29:35+00:00,"[anshugoonj, roopshadasguupta]",2.0,[Goonj]
2,https://www.instagram.com/p/CnEjZvoIWE-/,2023-01-06 14:49:07,193.0,2023-01-06 10:56:09+00:00,[jagriti.yatra],2.0,[]
3,https://www.instagram.com/p/CnEjZvoIWE-/,2023-01-07 15:02:45,193.0,2023-01-06 10:56:09+00:00,[jagriti.yatra],2.0,[]
4,https://www.instagram.com/p/CXAwt4WokhX/,2021-12-03 07:26:07,52.0,2021-12-03 07:13:44+00:00,"[amazonnews_in, repost.user, goonj, cashify]",2.0,"[DeliveringSmiles, Goonj, repost]"


In [15]:
main_df = main_df.rename(columns={'timestamp': 'post_timestamp'})

In [16]:
main_df.to_json('Main_dataframe_nonprofit.json', orient='records', date_format='iso')

In [16]:
df_new = pd.read_json(r'D:\Study\Project\Minor Project\Categories\Main_dataframe_shoes.json')

In [17]:
df_new.head(100)

Unnamed: 0,postUrl,comment_timestamp,likesCount,post_timestamp,mentions
0,https://www.instagram.com/p/7Cv9hcJNvG/,2015-08-31T10:13:02.000Z,34,2015-08-31T09:36:51.000Z,"[whartever, repostapp]"
1,https://www.instagram.com/p/7Cw92ppNgg/,2015-08-31T09:51:34.000Z,42,2015-08-31T09:45:38.000Z,"[whartever, repostapp]"
2,https://www.instagram.com/p/7Cw92ppNgg/,2015-08-31T12:50:34.000Z,42,2015-08-31T09:45:38.000Z,"[whartever, repostapp]"
3,https://www.instagram.com/p/7Cw92ppNgg/,2015-08-31T17:33:26.000Z,42,2015-08-31T09:45:38.000Z,"[whartever, repostapp]"
4,https://www.instagram.com/p/Cxss3QBx6cO/,2023-09-27T15:40:16.000Z,359,2023-09-27T14:22:23.000Z,[nanditamiglani]
...,...,...,...,...,...
95,https://www.instagram.com/p/CzBtVmwJtv6/,2023-10-31T02:06:03.000Z,1844,2023-10-30T14:44:37.000Z,"[tattoozbyraghav, sandipan_zm]"
96,https://www.instagram.com/p/CzBtVmwJtv6/,2023-10-31T02:57:47.000Z,1844,2023-10-30T14:44:37.000Z,"[tattoozbyraghav, sandipan_zm]"
97,https://www.instagram.com/p/CzBtVmwJtv6/,2023-10-31T04:36:19.000Z,1844,2023-10-30T14:44:37.000Z,"[tattoozbyraghav, sandipan_zm]"
98,https://www.instagram.com/p/CzBtVmwJtv6/,2023-10-31T05:02:20.000Z,1844,2023-10-30T14:44:37.000Z,"[tattoozbyraghav, sandipan_zm]"


In [None]:
def get_total_commment_counts(timestamp):
    one_week_after = timestamp + pd.to_timedelta(7, unit='D')
    filtered_comments = main_df[(main_df['comment_timestamp'] >= timestamp) & (main_df['comment_timestamp'] <= one_week_after)]
    no_of_comments = filtered_comments.shape[0]
    return no_of_comments

In [None]:
all_mentions = []
all_comment_count = []

#converting the timestamp field of brand and comments df 
main_df['post_timestamp'] = pd.to_datetime(main_df['post_timestamp'], utc=True)
main_df['comment_timestamp'] = pd.to_datetime(main_df['comment_timestamp'], utc=True)

for i in range(len(main_df)):
    link = main_df['postUrl'].iloc[i]
    timestamp = main_df['post_timestamp'].iloc[i]
    no_of_comments = get_total_commment_counts(timestamp)
    inf = main_df['mentions'].iloc[i]
    for value in inf:
        all_mentions.append(value)
        all_comment_count.append(no_of_comments)
    print(i)
    

In [None]:
len(all_mentions)

In [None]:
len(all_comment_count)

In [None]:
# Create DataFrame with two columns
df = pd.DataFrame({'Influencers': all_mentions, '7D_Comment_Counts': all_comment_count})
inf_df_unique = df.groupby('Influencers', as_index=False)['7D_Comment_Counts'].mean()
inf_df_unique.to_excel('7D_comment_count_beverages.xlsx', index=False)

In [None]:
import pandas as pd
new_df = pd.read_csv(r'D:\Study\Project\Minor Project\Categories\Main_dataframe_food.csv')

In [None]:
new_df.head()

In [None]:
new_df.to_json('BB.json', orient='records',  date_format='iso')

In [None]:
new_json_df = pd.read_json(r'D:\Study\Project\Minor Project\Categories\BB.json')

In [None]:
new_json_df.head()