In [1]:
import pandas as pd
import numpy as np
from collections import defaultdict
from collections import OrderedDict
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def get_sentiment_change_mean(path):
    df_conversations = pd.read_csv(path,
                                 dtype={'id_str': 'str',
                                        'in_reply_to_screen_name':'str',
                                        'in_reply_to_status_id_str': 'str',
                                        'in_reply_to_user_id_str': 'str',
                                        'text': 'str',
                                        'timestamp_ms': 'float64',
                                        'user.id_str': 'str',
                                        'user.screen_name': 'str',
                                        'group_id': 'int'})
    
    # Add the second index for each reply within each group
    df_conversations['reply_index'] = df_conversations.groupby('group_index').cumcount() + 1
    

    # Set the two indexes for the DataFrame
    df_conversations.set_index(['group_index'], inplace=True)

    analyzer = SentimentIntensityAnalyzer()

    # Define a function to get the compound score for a sentence. Its range is from -1 to 1, with -1 being negative and 1 being positive. The range [-0.05, 0.05] is considered as neutral
    def get_sentiment_score(text):
        sentiment = analyzer.polarity_scores(text)
        return sentiment['compound']


    df_conversations['sentiment'] = df_conversations['text'].apply(get_sentiment_score)
    
    # Grouping by the first index of the MultiIndex
    grouped = df_conversations.groupby(level=0)

    # Filter conversations with at least 3 tweets
    filtered = grouped.filter(lambda x: len(x) >= 3)
    
    # Get the first level of the MultiIndex
    first_index = filtered.index.get_level_values(0)
    first_index_list = first_index.tolist()
    
    # Count number of tweets per convo, stored in dictionary
    consecutive_counter = {i : first_index_list.count(i) for i in first_index_list}
    # Create a sorted and ordered dictionary of length keys and convo index values
    res = defaultdict(list)
    for key, val in sorted(consecutive_counter.items()):
        res[val].append(key)
    dict1 = OrderedDict(sorted(res.items()))
    
    
    key_list = list(dict1.keys())
        
    value_list = list(dict1.values())
                           
    sentiment_list = {}
    for keys in range(0, len(key_list)):
        for vals in value_list[keys]:
            temp = []
            for _, id_variable in filtered.loc[vals].iterrows():
                sentiment_variable = id_variable['sentiment']
                for index, value in filtered.loc[vals][::-1].iterrows():
                    if (value['id_str'] == id_variable['id_str']):
                        break
                    if (value['user.id_str'] == id_variable['user.id_str']):
                        temp.append(value['sentiment'] - sentiment_variable)
                        break
            sentiment_list[key_list[keys]] = (sum(temp))


    print(sentiment_list)

In [2]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_AirFrance.csv")


{3: -0.1531, 4: 1.1372999999999998, 5: -0.10439999999999999, 6: 0.3967, 7: -0.10120000000000007, 8: -0.04610000000000003, 9: -0.09739999999999999, 10: 0.3120000000000001, 11: 0.5232999999999999, 12: 1.2467000000000001, 13: -1.077, 15: -5.4487000000000005, 16: 2.347, 17: 0.19830000000000003, 20: -3.4629000000000003, 23: -1.5604, 26: -0.5633999999999999, 27: 0.05249999999999999, 30: 3.0841999999999996, 51: -8.665600000000001, 111: -1.0876000000000001, 182: 8.8074, 187: -9.0188}


In [3]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_AmericanAir.csv")


KeyboardInterrupt: 

In [4]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_BA.csv")


{3: 0.3942, 4: 0, 5: 0.0, 6: -0.5522, 7: 0.6644, 8: -0.09599999999999986, 9: -0.15249999999999997, 10: -1.0558999999999998, 11: 6.4521999999999995, 12: 2.7364000000000006, 13: 2.5802, 14: 1.4829, 15: 0.3117000000000001, 16: 8.4978, 17: 0.5843, 18: 0.3259, 19: 0, 20: -0.9105000000000003, 21: 0, 23: 1.5657, 24: 0.8444, 25: 1.1170999999999998, 27: 1.7008999999999994, 28: 0.059499999999999775, 29: 4.801399999999999, 32: 6.7189, 33: 0.6869000000000001, 34: 1.8948, 37: -1.9951999999999999, 38: 0.6262000000000001, 39: 0.8867999999999998, 40: -2.4867999999999997, 42: 0.14950000000000002, 43: -3.9202, 45: 0.46759999999999985, 51: -6.2604, 52: -3.7361, 53: 0.6386, 56: 1.5341000000000002, 64: 2.5936999999999997, 65: -1.6451999999999998, 66: -0.521, 69: -1.0009000000000001, 70: -3.9525999999999994, 74: -1.2878, 85: -7.4501, 90: 12.279600000000002, 94: -10.167100000000001, 108: 3.8469000000000015, 111: -6.7082, 112: -5.1351, 118: -0.9228000000000001, 122: -6.999999999999999, 135: -3.805899999999999

In [5]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_easyjet.csv")


{3: -0.6363, 4: 0.13680000000000003, 5: -0.0045999999999999375, 6: 0.2136, 7: -1.6171, 8: -0.2178000000000001, 9: 4.0169999999999995, 10: -3.3741000000000003, 11: 5.2075000000000005, 12: 1.3788, 13: -7.1623, 14: -0.34440000000000004, 15: 0.1027, 16: 0.5448, 17: -0.07700000000000001, 18: -2.2493, 19: -1.053, 20: 1.5146000000000002, 21: 0, 22: -3.5548, 23: -0.1522, 24: 0.18389999999999995, 25: 0.9653, 26: 1.4822999999999997, 27: -4.0174, 28: 3.2577, 29: 0.41529999999999995, 30: 1.5912000000000002, 31: -2.0723, 32: -1.577, 33: 1.0448, 34: 3.421400000000001, 35: -0.5566, 36: -0.31379999999999963, 37: 2.1886, 38: -1.2270999999999999, 39: -1.5281999999999998, 41: 2.6900000000000004, 44: -4.2229, 45: -0.2806, 46: 0.8257999999999999, 47: -0.1617999999999995, 48: 0.31310000000000004, 50: -0.34460000000000013, 54: 2.6305, 56: -22.056099999999997, 58: 3.4233000000000002, 59: 0.7212000000000001, 63: 1.4370999999999996, 65: -2.8440000000000003, 67: 1.7076999999999998, 68: 3.6682999999999995, 73: -2

In [10]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_EtihadAirways.csv")


0.15855939393939394

In [6]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_KLM.csv")


{3: 0.3543, 4: 0.02059999999999995, 5: 0.41759999999999997, 6: 0.3182, 7: -0.4215, 8: 0.9512999999999997, 9: 0.3466000000000001, 10: -0.19399999999999995, 11: 0.9993000000000001, 12: 0.7371999999999999, 13: -1.0537999999999998, 14: -0.1991, 15: 0.9047000000000001, 16: -0.20639999999999997, 17: 0.6419999999999999, 18: 5.107099999999999, 19: -0.5368999999999993, 20: 0, 21: -5.274699999999999, 22: 4.3807, 24: 3.1161000000000003, 25: 0.1402, 26: -0.4039999999999999, 27: 1.6029000000000002, 28: 0.7726999999999999, 29: 2.2836, 30: 1.7250999999999999, 31: 3.8739999999999997, 32: -18.3964, 33: 0.9007999999999998, 34: -3.0369999999999995, 35: -1.8425999999999996, 37: -5.3577, 38: -1.0669, 40: -3.257199999999999, 41: 7.5693, 42: 5.0177000000000005, 43: -5.923700000000001, 44: -4.545400000000001, 48: 0.002999999999999281, 51: -7.796899999999998, 52: -6.360200000000002, 53: -2.9909, 54: 14.0277, 55: -7.753100000000001, 57: 1.5052999999999996, 62: -11.9228, 64: 9.854200000000002, 73: 2.690499999999

In [12]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_Lufthansa.csv")


0.15900756143667308

In [13]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_Qantas.csv")


0.20893318181818205

In [14]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_RyanAir.csv")


0.15190481283422486

In [15]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_SingaporeAir.csv")


0.1847798165137615

In [16]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_VirginAtlantic.csv")

0.13421927480916038