<a href="https://colab.research.google.com/github/IreneZhou0129/Google-Sheet-in-Colab/blob/main/Google_Sheet_%2B_Colab.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import auth
from google.auth import default
import gspread
import json
import pandas as pd

auth.authenticate_user()
creds, _ = default()

gc = gspread.authorize(creds)

## Retrieve the Google Sheet from Google Drive

In [2]:
workbook = gc.open("kaggle_sample") # File path is: "drive/MyDrive/kaggle_sample.gsheet", but you don't need to put the full path or the extention.
worksheet = workbook.worksheet("sentimentdataset")
rows = worksheet.get_all_values()

# Convert data into a DataFrame
df = pd.DataFrame.from_records(rows)

print(df.shape)
df.head(5)

(733, 13)


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12
0,Text,Sentiment,Timestamp,User,Platform,Hashtags,Retweets,Likes,Country,Year,Month,Day,Hour
1,Enjoying a beautiful day at the park!,Positive,2023-01-15 12:30:00,User123,Twitter,#Nature #Park,15.0,30.0,USA,2023,1,15,12
2,Traffic was terrible this morning.,Negative,2023-01-15 08:45:00,CommuterX,Twitter,#Traffic #Morning,5.0,10.0,Canada,2023,1,15,8
3,Just finished an amazing workout! 💪,Positive,2023-01-15 15:45:00,FitnessFan,Instagram,#Fitness #Workout,20.0,40.0,USA,2023,1,15,15
4,Excited about the upcoming weekend getaway!,Positive,2023-01-15 18:20:00,AdventureX,Facebook,#Travel #Adventure,8.0,15.0,UK,2023,1,15,18


In [3]:
# Rewrite the columns names
df.columns = df.iloc[0]
df = df.drop(df.index[0])

df.head(5)

Unnamed: 0,Text,Sentiment,Timestamp,User,Platform,Hashtags,Retweets,Likes,Country,Year,Month,Day,Hour
1,Enjoying a beautiful day at the park!,Positive,2023-01-15 12:30:00,User123,Twitter,#Nature #Park,15.0,30.0,USA,2023,1,15,12
2,Traffic was terrible this morning.,Negative,2023-01-15 08:45:00,CommuterX,Twitter,#Traffic #Morning,5.0,10.0,Canada,2023,1,15,8
3,Just finished an amazing workout! 💪,Positive,2023-01-15 15:45:00,FitnessFan,Instagram,#Fitness #Workout,20.0,40.0,USA,2023,1,15,15
4,Excited about the upcoming weekend getaway!,Positive,2023-01-15 18:20:00,AdventureX,Facebook,#Travel #Adventure,8.0,15.0,UK,2023,1,15,18
5,Trying out a new recipe for dinner tonight.,Neutral,2023-01-15 19:55:00,ChefCook,Instagram,#Cooking #Food,12.0,25.0,Australia,2023,1,15,19


## (Optional) Process the dataset with other files in Google Drive

In [4]:
# Read a JSON file
with open("drive/MyDrive/kaggle_sample.json") as json_file:
  sample_dict = json.load(json_file)

sample_dict

{'usa_twitter_positive': 'apple',
 'canada_twitter_negative': 'banana',
 'uk_facebook_positive': 'pear'}

In [5]:
# Initialize an empty list
new_column_list = []

# Iterate the dataset and read values
for _, row in df.iterrows():
  sentiment = row['Sentiment']
  platform = row['Platform']
  country = row['Country']

  # ['UK','twitter', 'positive'] -> 'uk_twitter_positive'
  curr_key = ('_'.join([country,platform,sentiment])).lower()

  if curr_key in sample_dict.keys():
    new_column_list.append(sample_dict[curr_key])
  else:
    new_column_list.append('Momo')

# Add the new column to df
df['new_column'] = new_column_list

df.head(5)

Unnamed: 0,Text,Sentiment,Timestamp,User,Platform,Hashtags,Retweets,Likes,Country,Year,Month,Day,Hour,new_column
1,Enjoying a beautiful day at the park!,Positive,2023-01-15 12:30:00,User123,Twitter,#Nature #Park,15.0,30.0,USA,2023,1,15,12,apple
2,Traffic was terrible this morning.,Negative,2023-01-15 08:45:00,CommuterX,Twitter,#Traffic #Morning,5.0,10.0,Canada,2023,1,15,8,banana
3,Just finished an amazing workout! 💪,Positive,2023-01-15 15:45:00,FitnessFan,Instagram,#Fitness #Workout,20.0,40.0,USA,2023,1,15,15,Momo
4,Excited about the upcoming weekend getaway!,Positive,2023-01-15 18:20:00,AdventureX,Facebook,#Travel #Adventure,8.0,15.0,UK,2023,1,15,18,pear
5,Trying out a new recipe for dinner tonight.,Neutral,2023-01-15 19:55:00,ChefCook,Instagram,#Cooking #Food,12.0,25.0,Australia,2023,1,15,19,Momo


## Update the original Google Sheet with the modified data

In [6]:
worksheet.update([df.columns.values.tolist()] + df.values.tolist())

{'spreadsheetId': '1UVxYGmVHi4HWy1ImHn7Ge8YpuAfv3wOj-53zMQXZgkw',
 'updatedRange': 'sentimentdataset!A1:N733',
 'updatedRows': 733,
 'updatedColumns': 14,
 'updatedCells': 10262}