In [31]:
import pandas as pd
import numpy as np
from collections import defaultdict
from collections import OrderedDict
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def get_sentiment_change_mean(path):
    df_conversations = pd.read_csv(path,
                                 dtype={'id_str': 'str',
                                        'in_reply_to_screen_name':'str',
                                        'in_reply_to_status_id_str': 'str',
                                        'in_reply_to_user_id_str': 'str',
                                        'text': 'str',
                                        'timestamp_ms': 'float64',
                                        'user.id_str': 'str',
                                        'user.screen_name': 'str',
                                        'group_id': 'int'})
    
    # Add the second index for each reply within each group
    df_conversations['reply_index'] = df_conversations.groupby('group_index').cumcount() + 1
    

    # Set the two indexes for the DataFrame
    df_conversations.set_index(['group_index'], inplace=True)

    analyzer = SentimentIntensityAnalyzer()

    # Define a function to get the compound score for a sentence. Its range is from -1 to 1, with -1 being negative and 1 being positive. The range [-0.05, 0.05] is considered as neutral
    def get_sentiment_score(text):
        sentiment = analyzer.polarity_scores(text)
        return sentiment['compound']


    df_conversations['sentiment'] = df_conversations['text'].apply(get_sentiment_score)
    
    # Grouping by the first index of the MultiIndex
    grouped = df_conversations.groupby(level=0)

    # Filter conversations with at least 3 tweets
    filtered = grouped.filter(lambda x: len(x) >= 3)
    
    # Get the first level of the MultiIndex
    first_index = filtered.index.get_level_values(0)
    first_index_list = first_index.tolist()
    
    # Count number of tweets per convo, stored in dictionary
    consecutive_counter = {i : first_index_list.count(i) for i in first_index_list}
    # Create a sorted and ordered dictionary of length keys and convo index values
    res = defaultdict(list)
    for key, val in sorted(consecutive_counter.items()):
        res[val].append(key)
    dict1 = OrderedDict(sorted(res.items()))
    
    
    key_list = list(dict1.keys())
    value_list = list(dict1.values())
 
    sentiment_list = {}
    for keys in range(0, len(key_list)):
        for vals in value_list[keys]:
            temp = []
            for _, id_variable in filtered.loc[vals].iterrows():
                sentiment_variable = id_variable['sentiment']
                for index, value in filtered.loc[vals][::-1].iterrows():
                    if (value['user.id_str'] != '18332190'):
                        if (value['id_str'] == id_variable['id_str']):
                            break
                        if (value['user.id_str'] == id_variable['user.id_str']):
                            temp.append(value['sentiment'] - sentiment_variable)
                            break
                sentiment_list[key_list[keys]] = (sum(temp))

    sent_df = pd.DataFrame([sentiment_list])
    print(sent_df.to_string())

In [28]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_easyjet.csv")


      3       4       5       6       7       8      9       10      11      12      13      14      15      16     17      18     19      20   21      22      23      24      25      26      27      28      29      30      31     32      33      34      35      36      37      38      39    41      44      45      46      47      48      50      54       56      58      59      63     65      67      68      73      83      84      87      88       91      92      94      96       99      109      110     121     124      148     157     237     279      308      339      390      569
0 -0.6363  0.1368 -0.0046  0.2136 -1.6171 -0.2178  4.017 -3.3741  5.2075  1.3788 -7.1623 -0.3444  0.1027  0.5448 -0.077 -2.2493 -1.053  1.5146    0 -3.5548 -0.1522  0.1839  0.9653  1.4823 -4.0174  3.2577  0.4153  1.5912 -2.0723 -1.577  1.0448  3.4214 -0.5566 -0.3138  2.1886 -1.2271 -1.5282  2.69 -4.2229 -0.2806  0.8258 -0.1618  0.3131 -0.3446  2.6305 -22.0561  3.4233  0.7212  1.4371 -2.844  1.7077  3.668

In [27]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_BA.csv")


     3     4     5       6       7      8       9       10      11      12    13      14      15      16      17      18    19      20    21      23    24     25      27      28     29      32      33      34      37     38      39      40    42      43      45      51      52      53      56      64      65    66      69      70      74      85      90      94     108     111     112     118     122     135     141     160     167     185     198      231     234      252     269    282      353     379     625     752     760      1089    1153     1540     1742     2664
0  0.3942     0   0.0 -0.5522  1.1698 -1.067  0.7405 -0.1769  2.4102  4.2001  0.99  1.4658  0.4768  4.6036  0.4642 -0.0652     0 -1.2323     0  0.5673   0.0  0.176  0.5646  1.3353  4.465  2.3586  0.6869  1.8948 -1.1592  0.436  0.9652 -2.4868     0  0.9161  1.1315 -0.1245  0.8501 -0.1833  0.1374 -0.8562  0.9956     0 -1.0688 -3.4479  1.6635 -3.7234  0.2133 -2.7552 -6.406  2.8853 -1.6398 -0.9228  0.2461 -0.5705  0.7586 

In [29]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_KLM.csv")


      3       4       5       6       7       8       9      10      11      12      13      14      15      16     17      18      19   20      21      22      24      25     26      27      28      29      30     31       32      33     34      35      37      38      40      41      42      43      44     48      51      52      53       54      55      57       62      64      73      75     77      87     94      103     138     149     152     153     160     174     221     223     391      460     463
0  0.3543  0.0206  0.4176  0.3182 -0.4215  0.9513  0.3466 -0.194  0.9993  0.7372 -1.0538 -0.1991  0.9047 -0.2064  0.642  5.1071 -0.5369    0 -5.2747  4.3807  3.1161  0.1402 -0.404  1.6029  0.7727  2.2836  1.7251  3.874 -18.3964  0.9008 -3.037 -1.8426 -5.3577 -1.0669 -3.2572  7.5693  5.0177 -5.9237 -4.5454  0.003 -7.7969 -6.3602 -2.9909  14.0277 -7.7531  1.5053 -11.9228  9.8542  2.6905  0.1455  5.966  1.5709 -3.471  8.0288 -0.9017 -5.1181  2.8226  5.0259  2.0916  3.2328 -7.4987  2.

In [None]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_AirFrance.csv")


In [None]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_AmericanAir.csv")


In [None]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_EtihadAirways.csv")


In [None]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_Lufthansa.csv")


In [None]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_Qantas.csv")


In [None]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_RyanAir.csv")


In [None]:
get_sentiment_change_mean("C:\\Users\\20211487\\index_testing\\conversations_total_SingaporeAir.csv")
