In [1]:
import pandas as pd
import ast

In [2]:
def aspects_to_dict(aspect_string):
    aspects_dict = dict(ast.literal_eval(aspect_string))
    
    return aspects_dict

## NSC DATA

In [3]:
nsc_data = pd.read_csv("ABSA_Reddit_Results/nsc_run_1_res.csv", index_col=0)
nsc_data.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,cleaned_results
1151,v4o448,Season Ticket Holder Scarf Question,For any of you that have the season ticket hol...,trillwilly69,2022-06-04 12:43:51,"[('stadium amenities', 0.13), ('pricing', -0.3..."
1152,ua9rgq,Bidding for our Hearts for Ukraine fundraiser ...,,Bootleggers_SG,2022-04-23 16:57:25,"[('miscellaneous', 0.15)]"
1153,tyl8nw,Someone tell me it's gonna be ok,I haven't received any kind of communications ...,R-Smelly,2022-04-07 19:52:58,"[('stadium amenities', 0.5)]"
1154,s786ep,Nashville Soccer Club Waives Miguel Nazarit,,JAShock,2022-01-18 21:18:33,"[('coaching and management', -0.73), ('pricing..."
1155,vifbqy,Vague?,,Actual_Illustrator59,2022-06-22 20:59:16,"[('team performance', -0.09), ('coaching and m..."


In [4]:
new_cleaned_results = pd.DataFrame(nsc_data.loc[:, "cleaned_results"].apply(aspects_to_dict))

In [5]:
nsc_data.drop("cleaned_results", axis=1, inplace=True)
nsc_data["cleaned_results"] = new_cleaned_results

# Sort by Post Date
nsc_data_sorted = nsc_data.sort_values(by="Post Date").reset_index(drop=True)

# Remove timestamp from Post Date column
nsc_data_sorted["Post Date"] = pd.to_datetime(nsc_data_sorted["Post Date"]).dt.date
nsc_data_sorted.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,cleaned_results
1109,17s5yif,2023 end-of-season fan survey [OC],,speedwaysoccerpod,2023-11-10,"{'team performance': -0.61, 'stadium amenities..."
1110,17x7oyy,Homecoming Kit Sale,Adidas has a huge markdown on this in their Bl...,SportsSpectacular,2023-11-17,"{'pricing': 0.82, 'stadium amenities': 0.45, '..."
1111,17xk4ov,D.C. United hires Nashville SC’s Ally Mackay a...,,pasoud,2023-11-17,"{'coaching and management': 0.17, 'miscellaneo..."
1112,183py9a,Hany Mukhtar is making the case that he is mor...,,ottersaregreat10,2023-11-25,"{'team performance': -0.89, 'stadium atmospher..."
1113,1840b9u,ISO Matchday Posters,Hello! I am looking to buy matchday posters fr...,beanerwiener8,2023-11-26,{'miscellaneous': -0.6}


In [6]:
aspects = ["team performance", "coaching and management", "stadium atmosphere", "stadium amenities", "pricing", "media coverage", "miscellaneous"]

for aspect in aspects:
    nsc_data_sorted[aspect] = nsc_data_sorted["cleaned_results"].apply(lambda x: x.get(aspect) if x and aspect in x else None)
    
nsc_data_sorted.drop("cleaned_results", axis=1, inplace=True)
nsc_data_sorted.tail()    

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous
1109,17s5yif,2023 end-of-season fan survey [OC],,speedwaysoccerpod,2023-11-10,-0.61,-0.5,0.0,0.0,0.0,0.56,0.31
1110,17x7oyy,Homecoming Kit Sale,Adidas has a huge markdown on this in their Bl...,SportsSpectacular,2023-11-17,,,,0.45,0.82,,0.77
1111,17xk4ov,D.C. United hires Nashville SC’s Ally Mackay a...,,pasoud,2023-11-17,,0.17,,,,,0.05
1112,183py9a,Hany Mukhtar is making the case that he is mor...,,ottersaregreat10,2023-11-25,-0.89,,0.23,,,,0.0
1113,1840b9u,ISO Matchday Posters,Hello! I am looking to buy matchday posters fr...,beanerwiener8,2023-11-26,,,,,,,-0.6


In [7]:
nsc_games = pd.read_csv("Games Data Cleaned/NSC Games.csv", index_col=0)
nsc_games.rename(columns={"Date": "Game Date"}, inplace=True)
nsc_games["Game Date"] = pd.to_datetime(nsc_games["Game Date"]).dt.date
nsc_games.head()

Unnamed: 0,Game Date,Comp,Round,Venue,Result,Score,Opponent,xG,xGA,Attendance,Percent
0,2022-02-27,MLS,Regular Season,Away,W,1,0,1.0,0.5,30013,0.0
1,2022-03-05,MLS,Regular Season,Away,D,1,1,1.7,1.9,18107,0.0
2,2022-03-12,MLS,Regular Season,Away,L,0,2,0.2,1.7,13448,0.0
3,2022-03-19,MLS,Regular Season,Away,L,1,2,2.3,0.9,20036,0.0
4,2022-04-02,MLS,Regular Season,Away,W,1,0,0.9,1.7,20371,0.0


In [8]:
nsc_merged = nsc_data_sorted.merge(nsc_games, how="left", left_on="Post Date", right_on="Game Date")
nsc_merged.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,...,Comp,Round,Venue,Result,Score,Opponent,xG,xGA,Attendance,Percent
1109,17s5yif,2023 end-of-season fan survey [OC],,speedwaysoccerpod,2023-11-10,-0.61,-0.5,0.0,0.0,0.0,...,,,,,,,,,,
1110,17x7oyy,Homecoming Kit Sale,Adidas has a huge markdown on this in their Bl...,SportsSpectacular,2023-11-17,,,,0.45,0.82,...,,,,,,,,,,
1111,17xk4ov,D.C. United hires Nashville SC’s Ally Mackay a...,,pasoud,2023-11-17,,0.17,,,,...,,,,,,,,,,
1112,183py9a,Hany Mukhtar is making the case that he is mor...,,ottersaregreat10,2023-11-25,-0.89,,0.23,,,...,,,,,,,,,,
1113,1840b9u,ISO Matchday Posters,Hello! I am looking to buy matchday posters fr...,beanerwiener8,2023-11-26,,,,,,...,,,,,,,,,,


In [9]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(nsc_merged.loc[32:35, :])

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous,Game Date,Comp,Round,Venue,Result,Score,Opponent,xG,xGA,Attendance,Percent
32,szyl0j,"Aké Loba: is it just me, or is he noticeably a...",,deltasig1985,2022-02-24,-0.05,0.12,,,,-0.5,-0.2,,,,,,,,,,,
33,t2nb5w,ahem....,LETSSSSSSSSSSSSSSSSGGOOOOOOOOOOOOOOOOOOOOOOOOO...,wolfhickey,2022-02-27,,,0.82,,,,,2022-02-27,MLS,Regular Season,Away,W,1.0,0.0,1.0,0.5,30013.0,0.0
34,t321fu,Here it is: Your opening day lineup.,,Moderator451,2022-02-28,0.28,0.29,-0.73,0.85,-0.95,0.92,,,,,,,,,,,,
35,t3575e,[Post-Match thread] Seattle Sounders vs Nashvi...,# [Major League Soccer - 2022/2023]\r\n#Seattl...,MatchCaster,2022-02-28,0.65,,0.6,,,,-0.3,,,,,,,,,,,


Write to a file

In [10]:
nsc_merged.to_csv("Scored Data With Games/nsc_scored_with_games.csv", index=False)

## PREDS DATA

In [11]:
preds_data = pd.read_csv("ABSA_Reddit_Results/preds_run_1_res.csv", index_col=0)
preds_data.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,cleaned_results
964,11d3381,"What the addition of Foote, probably means to us.",McDonagh paired with Foote in TB a lot last ye...,MusicCityJayhawk,2023-02-27 04:51:11,"[('coaching and management', 0.18), ('team per..."
965,w4p1xv,Just an idea for a Matthew Tkachuck trade,,mk1700,2022-07-21 19:14:00,"[('team performance', -0.32), ('miscellaneous'..."
966,t0v4ns,Can someone make this a shirt or hat or someth...,,aceofwades,2022-02-25 04:42:42,"[('miscellaneous', 0.28)]"
967,zjfega,Smashville Stocking Stuffer,Does anyone know how seating is decided? Ex. G...,jnkeegan,2022-12-11 23:57:51,"[('stadium amenities', -0.88), ('miscellaneous..."
968,ujsxg8,On ticket master is the lower the letter in th...,Trying to buy a ticket,Bakio-bay,2022-05-06 17:42:19,"[('pricing', 0.0), ('stadium amenities', 0.35)]"


In [12]:
new_preds_cleaned_results = pd.DataFrame(preds_data.loc[:, "cleaned_results"].apply(aspects_to_dict))

In [13]:
preds_data.drop("cleaned_results", axis=1, inplace=True)
preds_data["cleaned_results"] = new_preds_cleaned_results

# Sort by Post Date
preds_data_sorted = preds_data.sort_values(by="Post Date").reset_index(drop=True)

# Remove timestamp from Post Date column
preds_data_sorted["Post Date"] = pd.to_datetime(preds_data_sorted["Post Date"]).dt.date
preds_data_sorted.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,cleaned_results
933,183omt4,Who is the Preds biggest rival right now?,Was thinking about it this week. Over our team...,ConBurgundy15,2023-11-25,"{'team performance': 0.0, 'stadium amenities':..."
934,183v1a6,Has anybody ever bought this? I need to know,https://nashvillelockerroom.com/life-size-gnas...,ZeldaTheOuchMouse,2023-11-25,"{'pricing': -0.1, 'stadium amenities': 0.0}"
935,1844ldg,Keep an eye on Svechkov,"Folks, keep an eye on Svechkov. Several of the...",TwinTowersJenga,2023-11-26,"{'team performance': 0.34, 'coaching and manag..."
936,184btbo,Customized Reverse Retro Jersey,Hey everyone! Wife and I are going to the Jet...,Austinopril,2023-11-26,"{'stadium amenities': 0.25, 'pricing': -0.03}"
937,184ezp4,Juuse always starts slow,,Over-One-8,2023-11-26,"{'team performance': -0.36, 'coaching and mana..."


In [14]:
aspects = ["team performance", "coaching and management", "stadium atmosphere", "stadium amenities", "pricing", "media coverage", "miscellaneous"]

for aspect in aspects:
    preds_data_sorted[aspect] = preds_data_sorted["cleaned_results"].apply(lambda x: x.get(aspect) if x and aspect in x else None)
    
preds_data_sorted.drop("cleaned_results", axis=1, inplace=True)
preds_data_sorted.tail()    

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous
933,183omt4,Who is the Preds biggest rival right now?,Was thinking about it this week. Over our team...,ConBurgundy15,2023-11-25,0.0,0.02,-0.05,0.08,,0.04,0.16
934,183v1a6,Has anybody ever bought this? I need to know,https://nashvillelockerroom.com/life-size-gnas...,ZeldaTheOuchMouse,2023-11-25,,,,0.0,-0.1,,
935,1844ldg,Keep an eye on Svechkov,"Folks, keep an eye on Svechkov. Several of the...",TwinTowersJenga,2023-11-26,0.34,0.18,0.06,,,,
936,184btbo,Customized Reverse Retro Jersey,Hey everyone! Wife and I are going to the Jet...,Austinopril,2023-11-26,,,,0.25,-0.03,,
937,184ezp4,Juuse always starts slow,,Over-One-8,2023-11-26,-0.36,-0.24,,,,,0.17


In [15]:
preds_games = pd.read_csv("Games Data Cleaned/Preds Games.csv", index_col=0)
preds_games.rename(columns={"Date": "Game Date"}, inplace=True)
preds_games["Game Date"] = pd.to_datetime(preds_games["Game Date"]).dt.date
preds_games.head()

Unnamed: 0,Game Date,Venue,Score,Opponent,Result,Attendance,Percent
0,2022-01-01,Home,6,1,W,17504,0.982102
1,2022-01-04,Away,3,2,W,17804,0.0
2,2022-01-06,Away,4,2,W,14359,0.0
3,2022-01-08,Away,4,2,W,10317,0.0
4,2022-01-11,Home,5,4,W,17159,0.962745


In [16]:
preds_merged = preds_data_sorted.merge(preds_games, how="left", left_on="Post Date", right_on="Game Date")
preds_merged.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous,Game Date,Venue,Score,Opponent,Result,Attendance,Percent
933,183omt4,Who is the Preds biggest rival right now?,Was thinking about it this week. Over our team...,ConBurgundy15,2023-11-25,0.0,0.02,-0.05,0.08,,0.04,0.16,,,,,,,
934,183v1a6,Has anybody ever bought this? I need to know,https://nashvillelockerroom.com/life-size-gnas...,ZeldaTheOuchMouse,2023-11-25,,,,0.0,-0.1,,,,,,,,,
935,1844ldg,Keep an eye on Svechkov,"Folks, keep an eye on Svechkov. Several of the...",TwinTowersJenga,2023-11-26,0.34,0.18,0.06,,,,,,,,,,,
936,184btbo,Customized Reverse Retro Jersey,Hey everyone! Wife and I are going to the Jet...,Austinopril,2023-11-26,,,,0.25,-0.03,,,,,,,,,
937,184ezp4,Juuse always starts slow,,Over-One-8,2023-11-26,-0.36,-0.24,,,,,0.17,,,,,,,


In [17]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(preds_merged.loc[338:343, :])

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous,Game Date,Venue,Score,Opponent,Result,Attendance,Percent
338,123vfz3,Jordan Gross (NASH) on waivers today,,Jmthrows,2023-03-27,-0.05,0.12,0.0,0.0,0.0,0.14,0.04,,,,,,,
339,124bjdu,Help for Covenant Kids,EDIT: I have heard from someone at the preds t...,KaleidoscopeOk1346,2023-03-28,,,,,,,0.43,2023-03-28,Away,2.0,1.0,W,17850.0,0.0
340,124sx95,McCarron and Gravel up to NSH,,TwinTowersJenga,2023-03-28,0.59,,,,,,0.16,2023-03-28,Away,2.0,1.0,W,17850.0,0.0
341,1253nxk,[Scott McLaughlin] The Bruins will be showing ...,,Kroger453PredsFan,2023-03-28,,,0.75,,,,,2023-03-28,Away,2.0,1.0,W,17850.0,0.0
342,1256uiz,Who are your favorite players?,"Hey smashville, who are your favorite players?...",meinchman,2023-03-28,0.33,0.17,0.1,0.0,0.0,0.0,0.14,2023-03-28,Away,2.0,1.0,W,17850.0,0.0
343,12587h4,Caleb for Coach,,Dalanard,2023-03-29,,0.16,,,,,0.55,,,,,,,


Write to a file

In [18]:
preds_merged.to_csv("Scored Data With Games/preds_scored_with_games.csv", index=False)

## TITANS DATA

In [19]:
titans_data = pd.read_csv("ABSA_Reddit_Results/titans_run_1_res.csv", index_col=0)
titans_data.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,cleaned_results
1551,x0eryx,Titans Defense,This is NOT one of those “We get no respect 😠”...,rootbeercaveman,2022-08-29 03:51:14,"[('team performance', 0.14), ('media coverage'..."
1552,uu12r4,Derrick Henry buys new house in Dallas?,,BuffaloKiller937,2022-05-20 16:23:27,"[('miscellaneous', 0.16)]"
1553,s408wb,"Who is better historically? Texans or Titans,","Debate time, which of those two are more succe...",PissedoffNflNerd,2022-01-14 19:19:48,"[('team performance', -0.43), ('media coverage..."
1554,13ir53i,Do you want to be hated?,"So because we have Levis, who seems to be univ...",Emilio_Estevezz,2023-05-16 01:32:40,"[('team performance', 0.27), ('media coverage'..."
1555,tmluss,Ryan Tannehill. Having criticisms and doubts a...,,Deceptivejunk,2022-03-24 16:34:09,"[('team performance', -0.07), ('coaching and m..."


In [20]:
new_titans_cleaned_results = pd.DataFrame(titans_data.loc[:, "cleaned_results"].apply(aspects_to_dict))

In [21]:
titans_data.drop("cleaned_results", axis=1, inplace=True)
titans_data["cleaned_results"] = new_titans_cleaned_results

# Sort by Post Date
titans_data_sorted = titans_data.sort_values(by="Post Date").reset_index(drop=True)

# Remove timestamp from Post Date column
titans_data_sorted["Post Date"] = pd.to_datetime(titans_data_sorted["Post Date"]).dt.date
titans_data_sorted.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,cleaned_results
1511,183wmqw,[Dianna Russini] “Sorry Buckeyes. You can cros...,,derrick22henry,2023-11-25,"{'team performance': -0.1, 'media coverage': -..."
1512,18494ao,Lions Fan Coming In Peace With a Random Questi...,Some of us were discussing this during the Mic...,MichiganMedium,2023-11-26,"{'team performance': 0.35, 'coaching and manag..."
1513,184dchi,Hellman’s added a last minute stop to their ga...,No kidding. They added this stop Friday. That’...,FilledUpTinCup,2023-11-26,{'miscellaneous': 0.21}
1514,184e5iq,Gobble Hill says gobble gobble up!,,Toastfrom2069,2023-11-26,{'miscellaneous': -0.28}
1515,184exhx,Can Mods Please Change the Sub desktop Pic,It's Burks catching a pass vs the Bengals in a...,JustStrolling_,2023-11-26,{'miscellaneous': 0.03}


In [22]:
aspects = ["team performance", "coaching and management", "stadium atmosphere", "stadium amenities", "pricing", "media coverage", "miscellaneous"]

for aspect in aspects:
    titans_data_sorted[aspect] = titans_data_sorted["cleaned_results"].apply(lambda x: x.get(aspect) if x and aspect in x else None)
    
titans_data_sorted.drop("cleaned_results", axis=1, inplace=True)
titans_data_sorted.tail()    

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous
1511,183wmqw,[Dianna Russini] “Sorry Buckeyes. You can cros...,,derrick22henry,2023-11-25,-0.1,-0.03,,,,-0.15,
1512,18494ao,Lions Fan Coming In Peace With a Random Questi...,Some of us were discussing this during the Mic...,MichiganMedium,2023-11-26,0.35,-0.15,-0.28,0.07,,0.09,0.0
1513,184dchi,Hellman’s added a last minute stop to their ga...,No kidding. They added this stop Friday. That’...,FilledUpTinCup,2023-11-26,,,,,,,0.21
1514,184e5iq,Gobble Hill says gobble gobble up!,,Toastfrom2069,2023-11-26,,,,,,,-0.28
1515,184exhx,Can Mods Please Change the Sub desktop Pic,It's Burks catching a pass vs the Bengals in a...,JustStrolling_,2023-11-26,,,,,,,0.03


In [23]:
titans_games = pd.read_csv("Games Data Cleaned/Titans Games.csv", index_col=0)
titans_games.rename(columns={"Date": "Game Date"}, inplace=True)
titans_games["Game Date"] = pd.to_datetime(titans_games["Game Date"]).dt.date
titans_games.head()

Unnamed: 0,Game Date,Result,Venue,Score,Opponent,Attendance,Percent
0,2022-01-02,W,Home,34,3,69091,0.994702
1,2022-01-09,W,Away,28,25,66722,0.0
2,2022-01-22,L,Home,16,19,69242,0.996876
3,2022-09-11,L,Home,20,21,67805,0.976187
4,2022-09-19,L,Away,7,41,70683,0.0


In [24]:
titans_merged = titans_data_sorted.merge(titans_games, how="left", left_on="Post Date", right_on="Game Date")
titans_merged.tail()

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous,Game Date,Result,Venue,Score,Opponent,Attendance,Percent
1511,183wmqw,[Dianna Russini] “Sorry Buckeyes. You can cros...,,derrick22henry,2023-11-25,-0.1,-0.03,,,,-0.15,,,,,,,,
1512,18494ao,Lions Fan Coming In Peace With a Random Questi...,Some of us were discussing this during the Mic...,MichiganMedium,2023-11-26,0.35,-0.15,-0.28,0.07,,0.09,0.0,,,,,,,
1513,184dchi,Hellman’s added a last minute stop to their ga...,No kidding. They added this stop Friday. That’...,FilledUpTinCup,2023-11-26,,,,,,,0.21,,,,,,,
1514,184e5iq,Gobble Hill says gobble gobble up!,,Toastfrom2069,2023-11-26,,,,,,,-0.28,,,,,,,
1515,184exhx,Can Mods Please Change the Sub desktop Pic,It's Burks catching a pass vs the Bengals in a...,JustStrolling_,2023-11-26,,,,,,,0.03,,,,,,,


In [25]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    display(titans_merged.loc[216:221, :])

Unnamed: 0,id,Title,Content,Author,Post Date,team performance,coaching and management,stadium atmosphere,stadium amenities,pricing,media coverage,miscellaneous,Game Date,Result,Venue,Score,Opponent,Attendance,Percent
216,xmzlij,Everything That Could Go Wrong Has 😥,,IronMango1,2022-09-24,-0.07,0.19,0.07,,0.17,,-0.07,,,,,,,
217,xnxsdh,That Second Half 😬,,IronMango1,2022-09-25,0.51,-0.14,0.0,,,,,2022-09-25,W,Home,24.0,22.0,68649.0,0.988338
218,xnxwnl,Doesn't feel like a win,"I am finding it very, very hard to watch these...",Halo_LAN_Party_2nite,2022-09-25,0.27,,-0.07,,,,,2022-09-25,W,Home,24.0,22.0,68649.0,0.988338
219,xo14hf,Hot topic: Todd Downing called a very good gam...,2nd half was more of bad luck at the end of dr...,air_volek07,2022-09-25,0.02,-0.1,,,,-0.09,,2022-09-25,W,Home,24.0,22.0,68649.0,0.988338
220,xo4sv7,[Mike Herndon] #Titans rookie punter Ryan Ston...,,HenryTheTitan,2022-09-26,0.12,0.0,,,,,,,,,,,,
221,xo572p,Why can’t I just be happy?,"Glad for the win, but I’m still bitter about u...",baby_NayNay,2022-09-26,-0.05,-0.17,0.06,,,,,,,,,,,


Write to a file

In [26]:
titans_merged.to_csv("Scored Data With Games/titans_scored_with_games.csv", index=False)