In [0]:
%pip install -U emojis


In [0]:
from pyspark.sql import functions as F
from pyspark.sql.functions import asc,desc
from pyspark.sql.window import Window
from pyspark.sql import Row
import emojis
import re
from string import punctuation 
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('vader_lexicon')
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
import operator
from pyspark.sql import types 


In [0]:
def convertSentiment (text):
  '''
  PROMISES: converts the sentiment col of original dataset to -1,0,1 to reflect neg,neu,pos
  REQUIRES: text is astring
  '''
  if(text=='positive'):
    return int(1)
  elif(text=='negative'):
    return int(-1)
  else:
    return int(0)

In [0]:
df=spark.read.csv("/FileStore/tables/dataset_Popularity_Prediction_of_Instagram_Posts_CartaEtAl-2.csv",inferSchema=True,header=True,multiLine=True,escape='"')
##conveting timestamp column to time type schema
df=df.withColumn('timestamp2',F.to_timestamp('timestamp'))
df=df.drop('timestamp','url','location','is_verified','img_urls','[0, 4]','[4, 8]','[8, 12]','[12, 16]','[16, 20]','[20, 24]','sentiment_score')
df=df.withColumnRenamed('timestamp2','timestamp')
df=df.withColumnRenamed("Unnamed: 0","num")
df=df.withColumnRenamed("like_prepost","PrevPost_1")
df=df.withColumnRenamed("like_pprepost","PrevPost_2")
df=df.withColumnRenamed("like_ppprepost","PrevPost_3")
df=df.withColumnRenamed("like_pppprepost\r","PrevPost_4")
dfRdd=df.select('timestamp','sentiment').rdd.map(list)
dfRdd=dfRdd.map(lambda x: Row(timestamp=x[0],sentiment_score=convertSentiment(x[1])))
tempDf=spark.createDataFrame(dfRdd)
df=df.join(tempDf,on=['timestamp'],how='inner')
df=df.drop('sentiment')
df.printSchema()
df.count()

In [0]:
def createDf(path):
  '''
  PROMISES: create new dataframe for a new instagram user that contains the following fields author, caption, likes, is_video, timestamp
  REQUIRES: path is the path pf the JSON file
  '''
  newUsers=spark.read.json(path,multiLine=True)
  newUsers.createOrReplaceTempView('newuser')
  df1=spark.sql("SELECT explode(GraphImages) As col ,GraphProfileInfo.username As author,GraphProfileInfo.info.followers_count  As num_follower FROM newuser")
  df1.createOrReplaceTempView('newuser')
  df2=spark.sql("SELECT row_number() over (order by col.taken_at_timestamp) as num,author,col.edge_media_to_caption.edges.node.text As caption,col.is_video As is_video,col.edge_media_preview_like.count As likes,col.taken_at_timestamp As timestamp,num_follower FROM newuser")
  return df2


In [0]:

path=["/FileStore/tables/ENSF_612/Project/cassandra_daher.json",
     "/FileStore/tables/ENSF_612/Project/cookblob.json",
     "/FileStore/tables/ENSF_612/Project/duskosremac_repyyc.json",
     "/FileStore/tables/ENSF_612/Project/hmaefood.json",
     "/FileStore/tables/ENSF_612/Project/itzikss.json",
     "/FileStore/tables/ENSF_612/Project/jacoblaster89kr.json",
     "/FileStore/tables/ENSF_612/Project/naomipela.json",
     "/FileStore/tables/ENSF_612/Project/nicolerubanovich.json",
     "/FileStore/tables/ENSF_612/Project/shteifanie.json",
       "/FileStore/tables/ENSF_612/Project/tacdog4242.json"
     ]
newDataDf=[]
for i in path:
  newDataDf.append(createDf(i))
newUserDf=newDataDf[0]
for i in range (len(newDataDf)):
  if (i+1)<len(newDataDf):
    newUserDf=newUserDf.union(newDataDf[i+1])
##caption column is an array-> extract first element
newUserDf=newUserDf.withColumn("caption2",newUserDf["caption"].getItem(0))
newUserDf=newUserDf.drop("caption")
newUserDf=newUserDf.withColumnRenamed("caption2","caption")
##casting timestamp column to timestamp type
newUserDf=newUserDf.withColumn('timestamp2',F.to_timestamp('timestamp'))
newUserDf=newUserDf.drop('timestamp')
newUserDf=newUserDf.withColumnRenamed('timestamp2','timestamp')
newUserDf.printSchema()

In [0]:
print("New IG data count is: ",newUserDf.count())

# Feature Matrix and Target Vector Steps:
- Find LMA per user then create target vector (popular if likes of a post> (1+tolerance)* LMA) Note: they used 3 differnt K values and 3 differnt teloernce values  ``csv file already has a bunch of rolling average columns mean 5, 20 ...
- Create new features that has average likes achieved by the K most recent posts `` K ∈ {5, 10, 15, 20, 30, 50}``
- Create new features for time, day ,week ,month, and season (extracted from Time Stamp column)
- Creat 5 features for number of likes of prev 5 posts
- Extract number of words ,number of users tagged ,number of hashtags ,and Sentiment from caption column 
- 10 categories of emoji used in a caption (either 1 or 0 )`happiness, love, sadness, travel, food, pet, angry, music, party and sport` Note: i have found  Unicode emoji csv file
- 10 categories of hashtags (each cell will have either 0 or 1), corresponding to 10 different levels of hashtag popularity the hastag popularity is simply the 5 highest hashtags used and the least 5 hashtags used in all of the data set

In [0]:
##Note we need to extract at least 100 post per user if we are using K=50

def LMA(K,df):
  '''
  PROMISES: calculates the LMA over a specified K value
  REQUIRES: K size of rolling average, df the dataframe 
  '''
  s="mean_"+str(K)
  w = Window.partitionBy("author").orderBy('num').rowsBetween(-K, 0)
  df = df.withColumn(s, F.avg('likes').over(w))
  return df


In [0]:
def dateExtraction (df):
  '''
  PROMISES: extrats Time,day of week, month, year, season from time stamp column
  REQUIRES: df dataFrame
  '''
  df=df.withColumn('Time',(F.concat(F.hour('timestamp'),F.lit(":"),F.minute('timestamp'))))
  df=df.withColumn('DayOfWeek',F.dayofweek('timestamp'))
  df=df.withColumn('DayOfYear',F.dayofyear('timestamp'))
  df=df.withColumn('Month',F.month('timestamp'))
  df=df.withColumn('Year',F.year('timestamp'))
  rdd_season=df.select('timestamp','dayOfYear').rdd.map(list)
  rdd_season=rdd_season.map(lambda x: Row(timestamp=x[0], Season=findSeason(x[1])))
  season_df=spark.createDataFrame(rdd_season)
  df=df.join(season_df,on=['timestamp'],how='inner')
  df=df.drop('DayOfYear')

  return df
  
def findSeason (doy):
  '''
  PROMISES: finds the season to which day of year belongs to
  REQUIRES: doy day of year parameter
  '''
  spring = range(80, 172)
  summer = range(172, 264)
  fall = range(264, 355)
  if doy in spring:
    return str('Spring')
  elif doy in summer:
    return str('Summer')
  elif doy in fall:
    return str('Fall')
  else :
    return str('Winter')

In [0]:
def numberOfLikes(K,df):
  '''
  PROMISES: Finds exact number of likes of the latest published posts
  REQUIRES: postNum is the previous post number 
  '''
  s="PrevPost_"+str(K)
  w = Window.partitionBy("author").orderBy('num').rowsBetween(-K, -K)
  df = df.withColumn(s, F.sum('likes').over(w))
  return df

In [0]:

my_punctuation=punctuation.replace("'",'')
stop=stopwords.words("english")
analyzer = SentimentIntensityAnalyzer()
def getEmoji(text,emojiList):
  '''
  PROMISES: finds whether text contains an emoji from emojiList
  REQUIRES: text a string, and emojiList array of all emojis
  '''
  if(text==None or len(text)==0):
    return int(0)
  f=emojis.get(text)
  while(len(f)>0):
    x=f.pop()
    if emojis.decode(x) in emojiList:
      return int(1)
  return int(0)

  
def numHashtags(text):
  
  '''
  PROMISES: finds number of hashtags in a text
  REQUIRES: text a string
  '''
  if(text==None or len(text)==0):
    return int(0)
  tag= re.findall(r"#(\w+)", text)
  return len(tag)

def numUsersTag(text):
  '''
  PROMISES: finds number of users tagged in a string
  REQUIRES: text is a string
  '''
  if(text==None or len(text)==0):
    return int(0)
  tag= re.findall(r"@(\w+)", text)
  return len(tag)
def wordCount(text):
  '''
  PROMISES: finds word count in a text
  REQUIRES: text is a string
  '''
  
  if(text==None or len(text)==0):
    return int(0)
  emoj=emojis.get(text)
  if(len(emoj)==len(text)):
    return int(0)
  tag= re.findall(r"#(\w+)", text)
  userTags= re.findall(r"@(\w+)", text)
  text=text.translate(str.maketrans('', '', my_punctuation))##translate after u get the tags
  words=text.split()

  for word in words:
    if (word in tag) or (word in userTags) or (word in emoj) or (word in stop):
      words.remove(word)
  return int(len(words))
def generateHashtagDict(text):
  '''
  PROMISES: fills the hashtag dictionary
  REQUIRES: text is a string
  '''
  if(text==None or len(text)==0):
    return None
  if ('#' in text):
    tags= re.findall(r"#(\w+)", text)
    return tags
  return None
  

    
def getSentiment(text):
  '''
  PROMISES: finds the polarity or sentiment of a text
  REQUIRES: text to be a string
  '''
  if(text==None or len(text)==0):
    return int(0)
  emoj=emojis.get(text)
  for i in emoj:
    text=text.replace(i,"")
  polarity=analyzer.polarity_scores(text)
  neg=polarity['neg']
  pos=polarity['pos']
  neu=polarity['neu']
  if(neg> pos and neg> neu):
    return int(-1)
  elif(pos>neg and pos>neu):
    return int(1)
  else:
    return int(0)
def checkHashtagInText(text,word):
 
  '''
   PROMISES: checks wither a test contains the hashtag word
   REQUIRES: word is a string that contains a hashtag to be found in text
  '''
  if(text==None or len(text)==0):
    return int(0) 
    if ('#' in text):
      tags= re.findall(r"#(\w+)", text)
      for tag in tags:
        if(tag==word):
          return int(1)
      return int(0)
  else:
    return int(0)
        
  
  
    
  

In [0]:
#Creating new Features from timestamp
df=dateExtraction(df)
newUserDf=dateExtraction(newUserDf)
#Creating mean_5..50 for finalDf only since df already has these features in
newUserDf=LMA(5,newUserDf)
newUserDf=LMA(10,newUserDf)
newUserDf=LMA(15,newUserDf)
newUserDf=LMA(20,newUserDf)
newUserDf=LMA(30,newUserDf)
newUserDf=LMA(50,newUserDf)
#Creating features of prev post likes Only for finalDf, df already has these columns
newUserDf=numberOfLikes(1,newUserDf)
newUserDf=numberOfLikes(2,newUserDf)
newUserDf=numberOfLikes(3,newUserDf)
newUserDf=numberOfLikes(4,newUserDf)
# following the research paper they select 50 posts
newUserDf=newUserDf.filter(F.col('num')>=50)
newUserDf.cache()
##Creating tempDf that contains TextFeatur for the new extracted Data (Orginal data alread has these features)
newUserRdd=newUserDf.select("timestamp","caption").rdd.map(list)
newUserRdd=newUserRdd.map(lambda x: Row(timestamp=x[0],num_words=wordCount(x[1]),sentiment_score=getSentiment(x[1]),hashtags_count=numHashtags(x[1]),users_tagged=numUsersTag(x[1])))
newUserRdd.take(2)
newUserTempDf=spark.createDataFrame(newUserRdd)
newUserDf=newUserDf.join(newUserTempDf,on=['timestamp'],how='inner')#hoinning temp df with the main df for the new extracted data

##Rearranging columns for the original datasetDF and for the newly extracted df
newUserDf=newUserDf.select('timestamp','num','author','num_follower','is_video','caption','likes','Time','DayOfWeek','Month','Year','Season','mean_5','mean_10','mean_15','mean_20','mean_30','mean_50','PrevPost_1','PrevPost_2','PrevPost_3','PrevPost_4','num_words','sentiment_score','hashtags_count','users_tagged')
df=df.select('timestamp','num','author','num_follower','is_video','caption','likes','Time','DayOfWeek','Month','Year','Season','mean_5','mean_10','mean_15','mean_20','mean_30','mean_50','PrevPost_1','PrevPost_2','PrevPost_3','PrevPost_4','num_words','sentiment_score','hashtags_count','users_tagged')
#combining the dfs
finalDf=df.union(newUserDf)
finalDf=finalDf.dropDuplicates(["timestamp",'caption'])
finalDf.cache()

In [0]:
#Creating Hashtags Features
##Freq calculation of hashtags via reduce by key
hashtagRdd=finalDf.select('caption').rdd.map(list)
hashtagRdd=hashtagRdd.map(lambda x: generateHashtagDict(x[0]))
hashtagRdd=hashtagRdd.filter(lambda x: x is not None)
hashtagRdd=hashtagRdd.flatMap(list)
hashtagRdd=hashtagRdd.map(lambda x: (x,1))
hashtagRdd=hashtagRdd.reduceByKey(lambda key,val: key+val)
hashtagRdd=hashtagRdd.map(lambda x: (x[1],x[0]))
hashtagRdd=hashtagRdd.sortByKey(ascending=True)
leastFive=hashtagRdd.take(5)
hashtagRdd=hashtagRdd.sortByKey(ascending=False)
topFive=hashtagRdd.take(5)

##extracting the hashtags from top/least five list generated from prev step
hashtagsLeast=[]
hashtagsMost=[]
for key,val in leastFive:
  hashtagsLeast.append(val)
for key,val in topFive:
  hashtagsMost.append(val)

#Checking each caption if it contains any of the top5/ least 5 hashtags
hashtagFeatureRdd=finalDf.select('timestamp','caption').rdd.map(list)
hashtagFeatureRdd=hashtagFeatureRdd.map(lambda x: Row(timestamp=x[0],top1Tag=checkHashtagInText(x[1],hashtagsMost[0]),top2Tag=checkHashtagInText(x[1],hashtagsMost[1]),top3Tag=checkHashtagInText(x[1],hashtagsMost[2]),top4Tag=checkHashtagInText(x[1],hashtagsMost[3]),top5Tag=checkHashtagInText(x[1],hashtagsMost[4]),least1Tag=checkHashtagInText(x[1],hashtagsLeast[0]),least2Tag=checkHashtagInText(x[1],hashtagsLeast[1]),least3Tag=checkHashtagInText(x[1],hashtagsLeast[2]),least4Tag=checkHashtagInText(x[1],hashtagsLeast[3]),least5Tag=checkHashtagInText(x[1],hashtagsLeast[4])))
##merging dataframes
hashtagFeatureDf=spark.createDataFrame(hashtagFeatureRdd)
finalDf=finalDf.join(hashtagFeatureDf,on=['timestamp'],how='inner')
finalDf.cache()


In [0]:
## Emoji List
happyList=[":smile:",":simple_smile:",":laughing:",":smiley:",":smirk:",":satisfied:",":stuck_out_tongue_winking_eye:",":stuck_out_tongue_closed_eyes:",":stuck_out_tongue:",":smiley_cat:",":smile_cat:",":joy:",":joy_cat:"]
loveList=[":blush:",":heart_eyes:",":kissing_heart:",":kissing_closed_eyes:",":flushed:",":kissing:",":kissing_smiling_eyes:",":yellow_heart:",":blue_heart:",":purple_heart:",":heart:",":green_heart:",":heartbeat:",":heartpulse:",":two_hearts:",":revolving_hearts:",":cupid:",":sparkling_heart:",":couple:",":family:",":couplekiss:",":couple_with_heart:",":kiss:",":love_letter:",":gift_heart:"]
sadnessList=[":worried:",":frowning:",":anguished:",":expressionless:",":unamused:",":disappointed_relieved:",":pensive:",":disappointed:",":cry:",":sob:",":broken_heart:",":-1:",":thumbsdown:",":crying_cat_face:"]
travelList=[":city_sunrise:",":city_sunset:",":japanese_castle:",":european_castle:",":tokyo_tower:",":mount_fuji:",":sunrise_over_mountains:",":sunrise:",":statue_of_liberty:",":ship:",":airplane:",":helicopter:",":mountain_railway:",":train2:",":bullettrain_front:",":bullettrain_side:",":light_rail:",":monorail:",":jp:",":kr:",":cn:",":us:",":fr:",":es:",":it:",":ru:",":gb:",":uk:",":de:",":earth_africa:",":earth_americas:",":earth_asia:",":palm_tree:"]
foodList=[":yum:",":fork_and_knife:",":pizza:",":hamburger:",":fries:",":poultry_leg:",":meat_on_bone:",":spaghetti:",":curry:",":fried_shrimp:",":bento:",":sushi:",":fish_cake:",":rice_ball:",":rice_cracker:",":rice:",":ramen:",":stew:",":oden:",":dango:",":egg:",":bread:",":doughnut:",":custard:",":icecream:",":ice_cream:",":shaved_ice:",":birthday:",":cake:",":cookie:",":chocolate_bar:",":candy:",":lollipop:",":honey_pot:",":apple:",":green_apple:",":tangerine:",":lemon:",":cherries:",":grapes:",":watermelon:",":strawberry:",":peach:",":melon:",":banana:",":pear:",":pineapple:",":sweet_potato:",":eggplant:",":tomato:",":corn:"]
petList=[":cat:",":dog:",":mouse:",":hamster:",":rabbit:",":wolf:",":frog:",":tiger:",":koala:",":bear:",":pig:",":pig_nose:",":cow:",":boar:",":monkey_face:",":monkey:",":horse:",":racehorse:",":camel:",":sheep:",":elephant:",":panda_face:",":snake:",":bird:",":baby_chick:",":hatched_chick:",":hatching_chick:",":chicken:",":penguin:",":turtle:",":bug:",":honeybee:",":ant:",":beetle:",":snail:",":octopus:",":tropical_fish:",":fish:",":whale:",":whale2:",":dolphin:",":cow2:",":ram:",":rat:",":water_buffalo:",":tiger2:",":rabbit2:",":dragon:",":goat:",":rooster:",":dog2:",":pig2:",":mouse2:",":ox:",":dragon_face:",":blowfish:",":crocodile:",":dromedary_camel:",":leopard:",":cat2:",":poodle:"]
angryList=[":angry:",":rage:",":triumph:",":imp:",":anger:",":punch:",":facepunch:"]
musicList=[":notes:",":musical_note:",":sound:",":speaker:",":loudspeaker:",":mega:",":musical_score:",":musical_keyboard:",":violin:",":microphone:",":headphones:",":trumpet:",":saxophone:",":guitar:"]
partyList=[":collision:",":boom:",":metal:",":tada:",":confetti_ball:",":balloon:",":beer:",":beers:",":wine_glass:",":cocktail:",":tropical_drink:",":partying_face:"]
sportList=[":football:",":basketball:",":soccer:",":baseball:",":tennis:",":8ball:",":rugby_football:",":bowling:",":golf:",":mountain_bicyclist:",":bicyclist:",":horse_racing:",":snowboarder:",":swimmer:",":surfer:",":ski:"]


In [0]:
#Creating Emoji Features
captionRdd=finalDf.select("caption","timestamp").rdd.map(list)
captionRdd=captionRdd.map(lambda x: Row(timestamp=x[1],happy=int(getEmoji(str(x[0]),happyList)),sad=getEmoji(str(x[0]),sadnessList),love=getEmoji(str(x[0]),loveList),travel=getEmoji(str(x[0]),travelList),food=getEmoji(str(x[0]),foodList),pet=getEmoji(str(x[0]),petList),angry=getEmoji(str(x[0]),angryList),music=getEmoji(str(x[0]),musicList),party=getEmoji(str(x[0]),partyList),sport=getEmoji(str(x[0]),sportList)))
##merging dataframes
captionDf=spark.createDataFrame(captionRdd)
finalDf=finalDf.join(captionDf,on=['timestamp'],how='inner')
finalDf.cache()                              

In [0]:
def createVector(delta,K,likes):
  '''
  PROMISES: finds if a post is popular or not based on the eqn in paper (popular if likes of a post> (1+delta)* K)
  '''
  if(likes>((1+delta)*K)):
    return int(1)
  else:
    return int(0)

In [0]:
##creating an rdd with selected columns and checking wheter a post is popular or not 
targetVectorRdd=finalDf.select('timestamp','likes','mean_10','mean_30','mean_50').rdd.map(list).repartition(10)
targetVectorRdd=targetVectorRdd.map(lambda x: Row( timestamp=x[0],PC_0_10=createVector(0,x[2],x[1]),PC_05_10=createVector(0.05,x[2],x[1]),PC_1_10=createVector(0.1,x[2],x[1]),PC_15_10=createVector(0.15,x[2],x[1]),PC_0_30=createVector(0,x[3],x[1]),PC_05_30=createVector(0.05,x[3],x[1]),PC_1_30=createVector(0.1,x[3],x[1]),PC_15_30=createVector(0.15,x[3],x[1]),PC_0_50=createVector(0,x[4],x[1]),PC_05_50=createVector(0.05,x[4],x[1]),PC_1_50=createVector(0.1,x[4],x[1]),PC_15_50=createVector(0.15,x[4],x[1])))
##creating datafram and merging with the final df
targetVectorDf=spark.createDataFrame(targetVectorRdd)
finalDf=finalDf.join(targetVectorDf,on=['timestamp'],how='inner')
finalDf=finalDf.dropDuplicates(["timestamp",'caption'])
finalDf=finalDf.sort(F.asc('author'),F.asc('timestamp'))

In [0]:
finalDf.cache()
finalDf.count()
display(finalDf)

timestamp,num,author,num_follower,is_video,caption,likes,Time,DayOfWeek,Month,Year,Season,mean_5,mean_10,mean_15,mean_20,mean_30,mean_50,PrevPost_1,PrevPost_2,PrevPost_3,PrevPost_4,num_words,sentiment_score,hashtags_count,users_tagged,top1Tag,top2Tag,top3Tag,top4Tag,top5Tag,least1Tag,least2Tag,least3Tag,least4Tag,least5Tag,happy,sad,love,travel,food,pet,angry,music,party,sport,PC_0_10,PC_05_10,PC_1_10,PC_15_10,PC_0_30,PC_05_30,PC_1_30,PC_15_30,PC_0_50,PC_05_50,PC_1_50,PC_15_50
2019-02-25T21:03:00.000+0000,49,100montaditos.cagliari,960,False,Quanti like si merita il nostro montadito 52? 👍 Scatenatevi! #100Montaditos #foodporn #foodblogger #yummy #likeforlikes #photography #party #good #mood #dinner,22,21:3,2,2,2019,Winter,34.6,31.7,39.2,42.5,43.23333333,39.1,17,55,54,17.0,10,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2019-03-02T17:17:00.000+0000,48,100montaditos.cagliari,960,False,🤓⚽️noi siamo pronti e voi ?? Vi aspettiamo ⚽️🍽🍽🍷 #100montaditos #mood #food #instagram #ñino #eat #instafood #tbt #yummy #football,11,17:17,7,3,2019,Winter,33.0,29.0,36.53333333,41.55,43.2,39.1,22,17,55,54.0,9,0,10,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0
2019-03-03T01:22:00.000+0000,47,100montaditos.cagliari,960,False,"🤯🤯😍Non avete voglia di uscire ??state studiando e avete voglia di montaditos ?? Bene, siamo operativi su deliveroo per soddisfarvi e saziarvi !!♥️ @deliveroo_italy #deliveroo #foodporn #food #foodblogger #instalike #instagood #spain #look #mirame #picoftheday #cool #family #live #love #yummy #details #football #amazing #bestoftheday #100montaditos #italy",32,1:22,1,3,2019,Winter,31.8,28.8,33.13333333,38.55,42.26666667,38.58,11,22,17,55.0,23,0,21,1,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,1,1,1,0,0,0,0,0,0,0,0,0
2019-03-03T16:12:00.000+0000,46,100montaditos.cagliari,960,False,Il carnevale passa da 100m #carnevale #look #picoftheday #mood #nice #cute #best #beautiful #nice #amazing #pic #family #workout #work,40,16:12,1,3,2019,Winter,27.4,29.9,31.33333333,38.15,41.43333333,38.54,32,11,22,17.0,5,0,14,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,1,0,0,0
2019-03-07T18:50:00.000+0000,45,100montaditos.cagliari,960,False,Così finire di lavorare ha più gusto. #afterwork #100Montaditos 😊💻,24,18:50,5,3,2019,Winter,24.4,31.1,29.6,37.75,40.76666667,38.66,40,32,11,22.0,8,0,2,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2019-03-08T08:47:00.000+0000,44,100montaditos.cagliari,960,False,Auguri Donne ! Vi aspettiamo per saziarvi e rendere questo giorno ancora più speciale con i sapori della Spagna 🎉🎉🥰 #100montaditos #love #food,27,8:47,6,3,2019,Winter,25.8,30.2,29.73333333,35.85,38.83333333,38.56,24,40,32,11.0,20,0,3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2019-03-14T19:55:00.000+0000,43,100montaditos.cagliari,960,False,"I migliori amici di sempre, qualche montaditos da condividere e quattro chiacchiere. Cosa c’è di meglio? 🙌 #100Montaditos",38,19:55,5,3,2019,Winter,26.8,29.9,28.26666667,34.1,38.3,38.44,27,24,40,32.0,17,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0
2019-04-01T17:28:00.000+0000,42,100montaditos.cagliari,960,False,Iniziamo con i nostri aperitivi spagnoli 🥂🍾🍷🍺 #tapas #bestfriends #foodporn #jamonjamon #jamoniberico #mood #drink #goodvibes #nice #spain #yummy #love #foodlover #foodblogger #bestoftheday #monday #likeforlikes,62,17:28,2,4,2019,Spring,32.2,32.0,29.93333333,32.9,39.03333333,38.56,38,27,24,40.0,7,0,17,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,1,1,1,1,1,1,1,1,1,1,1
2019-04-19T21:59:00.000+0000,41,100montaditos.cagliari,960,False,#happyester,27,21:59,6,4,2019,Spring,38.2,32.8,32.66666667,33.05,39.7,39.02,62,38,27,24.0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
2019-05-01T02:10:00.000+0000,40,100montaditos.cagliari,960,False,Vado io !!! #foodporn #food #followforfollowback #look #espana🇪🇸 #love #mood,38,2:10,4,5,2019,Spring,35.6,30.0,32.6,31.1,39.43333333,38.72,27,62,38,27.0,3,0,7,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,1,1,1,1,0,0,0,0,0,0,0,0


In [0]:
finalDf.printSchema()