# 3. Analysis
##  3.6. Q6: Comparison of a duration of news coverage on Ukraine with Russians attitudes towards Ukraine 


In [1]:
import re
import pandas as pd
import numpy as np
import altair as alt
from altair import datum


In [67]:
%run "Functions to use in analysis.ipynb"

## Data Loading

In [3]:
# path = "G:/My Drive/01.MADS Courses/SIADS 593 Milestone I/02.Data Collection and Cleaning/df_tokenized.csv"
path = "data/df_tokenized.csv"
df_scraped = pd.read_csv(path)
df_scraped.drop(columns=["Unnamed: 0"], inplace=True)
df_scraped = df_scraped[df_scraped["whole newscast"] == False]
df_scraped["date"] = pd.to_datetime(df_scraped["date"])


In [None]:
"The scraped dataframe contains {} rows".format(df_scraped.shape[0])


'The scraped dataframe contains 350719 rows'

In [5]:
# Setting all the text columns to lower case
for col in [
    "tags_top",
    "title",
    "body",
    "tags_bottom",
    "body_token_final",
    "title_token_final",
]:
    df_scraped[col] = df_scraped[col].str.lower()


In [6]:
# Adjusting video duration for some cases
df_scraped.iloc[348394, 7] = 8
df_scraped.iloc[347534, 7] = 24
df_scraped.iloc[342323, 7] = 51
df_scraped.iloc[337016, 7] = 181


In [50]:
# Dictiornary Russian to English used in the following charts but also for documentation
dict_ru_en = {
    "специальная военная операция": "Special Military Operation",
    "националистическ": "Nationalist",
    "националистические": "Nationalist",
    "националистическому": "Nationalist",
    "украин": "Ukraine",
    "киев": "Ukraine",
    "мы вместе": "We are all together",
    "все для победы": "Everything for victory",
    "своих не бросаем": "Never abandon our people",
    "родина": "Motherland",
    "отчизна": "Motherland",
    "коллективный запад": "Collective West",
    "нацисты": "Nazist",
}


**Creating variabels** to find words in the news broadcast leveraging the function *find_tokens* in the notebook *"Functions to use in analysis.ipynb"*. These variables represent the *catchy slogans* as well as *words* related to Ukraine

In [60]:
# SMO: Special Military Operation
smo = "специальная военная операция"

# Ukraine
token_dict_ukraine = {
    "tags_bottom": ["украин", "киев"],
    "body_token_final": ["украин", "киев"],
    "title_token_final": ["украин", "киев"],
}

#nationalists
token_dict_nationalist = {
    "tags_bottom": ["националистическ", "националистические", "националистическому"],
    "body_token_final": [
        "националистическ",
        "националистические",
        "националистическому",
    ],
    "title_token_final": [
        "националистическ",
        "националистические",
        "националистическому",
    ],
}
# We are all together
together = "мы вместе"
# Everything for victory
victory = "все для победы|всё для победы|все для победы|всё для победы"
# Collective West
collective_west = "коллективный запад|коллективный запад|коллективному западу|коллективному западу|коллективном западе"
# Never abandond our people
people = "своих не бросаем|своих не бросаем"
motherland = {
    "tags_bottom": ["родина", "отчизна"],
    "body_token_final": ["родина", "отчизна"],
    "title_token_final": ["родина", "отчизна"],
}
nazi = {
    "tags_bottom": ["нацисты", "нацизм"],
    "body_token_final": ["нацисты", "нацизм"],
    "title_token_final": ["нацисты", "нацизм"],
}
# 10065 rows


In [14]:
def masking_df_with_word(df, slogan1, slogan2=None):
    """Filtering the scraped dataframe with one or two catchy slogans. When two slogans are selected,
    the fucnction the number of co-occurences in a same news
    Args:
    df - dataframe originating from the web scraping script e.g. df_scraped
    slogan1 - dictionary or string that represents a catchy slogan used to filter the scraped dataframe
    slogan2 - dictionary that represents a catchy slogan used to filter the scraped dataframe.
    It can be used for co-occurence with slogan1

    Returns
    df - dataframe filtered with slogan1 and optionally slogan2
    """

    # check if slogan1 is a string
    if isinstance(slogan1, str):
        df = df[df["body"].str.contains(slogan1, na=False)]

    if isinstance(slogan1, dict):
        df2 = df.copy()  # used only if there are two conditions
        df = find_tokens(df, slogan1)
        df = df[df["contain_token"] == 1]

        if slogan2 != None:
            df2 = find_tokens(df2, slogan2)
            df2 = df2[df2["contain_token"] == 1]
            df = df.merge(df2, left_index=True, right_index=True)
            df["contain_token_merge"] = df.apply(
                lambda row: 1
                if (row["contain_token_x"] == 1 and row["contain_token_y"] == 1)
                else 0,
                axis=1,
            )
            df.drop(
                [col for col in df.columns if col.endswith("_y")], inplace=True, axis=1
            )
            df = df.rename(mapper=lambda x: x.replace("_x", ""), axis=1)
            df.rename({"contain_token_merge": "contain_token"})
            df = df[df["contain_token"] == 1]

    return df


Before building the data vizualizations to answer the question of this notebook, let's understand **what is the share of news that talk about Ukraine in the last 10 years?**

In [34]:
df_ukr = masking_df_with_word(df_scraped, token_dict_ukraine)
df_ukr = df_ukr.copy()
df_ukr["date"] = pd.to_datetime(df_ukr["date"])
"Share of News featuring Ukraine since 2013 is {:.1%}".format(len(df_ukr[df_ukr.date > "2013"]) 
                                                / len(df_scraped[df_scraped.date > "2013"]))


'Share of News featuring Ukraine since 2013 is 17.8%'

In [32]:
def grouping(df, x_axis):
    """grouping the df by the chosen x_axis e.g. year_month
    Args
    df - dataframe to be grouped
    x_axis - string representing the column to be grouped by e.g. year_month
    """
    df = (
        df.groupby(x_axis)
        .agg(
            {"id_in_source_file": np.count_nonzero, "video_duration_seconds": np.nansum}
        )
        .reset_index()
        .rename({"id_in_source_file": "Count of News Articles"}, axis=1)
    )
    df["Sum of Video Duration (hours)"] = df["video_duration_seconds"] / 3600
    df.drop("video_duration_seconds", axis=1, inplace=True)
    df[x_axis] = pd.to_datetime(df[x_axis])
    return df


In [36]:
def get_chart(df, slogan1, x_axis, y_axis, date_from='2006', width_chart=800, height_chart=300, slogan2=None):
    """Build an area chart from a df, filtered for slogan1 and optionally for slogan2 for co-occurrences.
    The chart is built by providing the x_axis (e.g. year_month) which is encoded in x axis of the position channel and
    the y_axis whichi is econded in the other vertical axis (e.g. Count of News Articles Featuring)
    Args
    df - dataframe originating from the web scraping script e.g. df_scraped
    x_axis -  grouping column to be encoded in the x axis e.g. year_month
    y_axis - measure to be encoded in the y axis e.g. # Broadcasting News
    slogan1 - dictionary or string that represents a catchy slogan used to filter the scraped dataframe
    slogan2 - dictionary that represents a catchy slogan used to filter the scraped dataframe.
    It can be used for co-occurence with slogan1
    date_from - string in a date format that specify the starting period of the chart
    width_chart - float representing the width of the chart. Default=800
    height_chart - float representing the height of the chart. Default=300

    Returns
    layered chart - area chart with the above encodings and area as mark overlayed by a line chart and a scatter plot
    """
    df = masking_df_with_word(df, slogan1, slogan2)
    df = grouping(df, x_axis)
    # filtering from the input date
    df = df[df[x_axis]>=date_from]
    # formatting the axis depending on the type of grouping column
    if x_axis == "year":
        format_x = "%Y"
    elif x_axis == "year_month":
        format_x = "%Y-%m"

    area = (
        alt.Chart(df)
        .mark_area(color="#3C8689", fillOpacity=0.7)
        .encode(
            alt.X(
                x_axis,
                type="temporal",
                axis=alt.Axis(
                    labelColor="#897C57",
                    labelFont="Franklin Gothic Demi Cond",
                    labelFontSize=18,
                    grid=False,
                    ticks=True,
                    labelAngle=-45,
                    format=format_x,
                    title=None,
                ),
            ),
            alt.Y(
                y_axis,
                type="quantitative",
                axis=alt.Axis(
                    domain=False,
                    labelColor="#897C57",
                    labelFont="Franklin Gothic Demi Cond",
                    labelFontSize=18,
                    grid=False,
                    gridWidth=0.5,
                    ticks=True,
                    titleFontStyle="Franklin Gothic Demi Cond",
                    titleColor="#897C57",
                    titleFontSize=35
                    
                )
            ),
        )
    )

    # overlaying a line chart over the area chart
    line = (
        alt.Chart(df)
        .mark_line(color="#897C57")
        .encode(
            alt.X(x_axis, type="temporal"),
            alt.Y(y_axis, type="quantitative"),
        )
    )

    # overlaying a scatter plot over the area chart to highlight the points
    point = (
        alt.Chart(df)
        .mark_point(color="#897C57", filled=True, opacity=1, size=60)
        .encode(
            alt.X(x_axis, type="temporal"),
            alt.Y(y_axis, type="quantitative"),
        )
    )

    if slogan2 == None:
        if isinstance(slogan1, dict):
            title_txt = (
                y_axis
                + " featuring "
                + dict_ru_en[slogan1["tags_bottom"][0]]
                + " in Title, Tags, or Description"
            )
        # using regex to extract  until '|' e.g. victory = "все для победы|всё для победы|"
        # --> все для победы and then translated with the dictiornary dict_ru_en to
        # Everything for victory
        elif isinstance(slogan1, str):
            title_txt = (
                y_axis
                + " featuring "
                + dict_ru_en[re.match("[\w\s]+", slogan1).group()]
                + " in Title, Tags, or Description"
            )
    else:
        title_txt = (
            y_axis
            + " featuring "
            + dict_ru_en[slogan1["tags_bottom"][0]]
            + " & "
            + dict_ru_en[slogan2["tags_bottom"][0]]
            + " in Title, Tags, or Description"
        )

    chart = (area + line + point).properties(
        title={
            "text": [title_txt],
            "font": "Franklin Gothic",
            "fontSize": 24,
            "color": "#897C57",
        },
        width=width_chart,
        height=height_chart
    )

    return chart


## Frequent use of Catchy Slogans: *Special Military Operation*

In [68]:
get_chart(
    df_scraped, smo, "year_month", "Count of News Articles",
).configure_view(strokeWidth=1.2, stroke="#897C57")


In [38]:
get_chart(
    df_scraped,
    smo,
    "year_month",
    "Sum of Video Duration (hours)",
)


## Frequent use of Words: *Ukraine*

In [41]:
get_chart(
    df_scraped,
    token_dict_ukraine,
    "year_month",
    "Count of News Articles",
)


Let's see how is the trend of video duration featuring Ukraine

In [42]:
get_chart(
    df_scraped,
    token_dict_ukraine,
    "year_month",
    "Sum of Video Duration (hours)",
    width_chart=1000,
)


From the two above charts two periods stood out:
1. Before the annexation of Crimea (February 2014), the trend started to increase reaching the peak during the event (February 2014)
2. Similary, before the invasion of Ukraine (February 2022) the trend started to increase reaching the peak during the event (February 2022)

## Frequent use of Catchy Slogans and Words: *Nationalist and Ukraine*

In [43]:
get_chart(
    df_scraped,
    token_dict_nationalist,
    "year_month",
    "Count of News Articles",
    slogan2=token_dict_ukraine,
)


In [44]:
get_chart(
    df_scraped,
    token_dict_nationalist,
    "year_month",
    "Sum of Video Duration (hours)",
    slogan2=token_dict_ukraine,
)


## Frequent use of Catchy Slogans and Words: *Nazist and Ukraine*

In [45]:
get_chart(
    df_scraped,
    nazi,
    "year_month",
    "Count of News Articles",
    slogan2=token_dict_ukraine,
)


In [46]:
get_chart(
    df_scraped,
    nazi,
    "year_month",
    "Sum of Video Duration (hours)",
    slogan2=token_dict_ukraine,
)


The use of words *Nazi* along with *Ukraine* in news' title, tags or description has been used more frequently in the last years as compared to the period from 2006 to 2013. Moreover, it has been used more frequently right before the two major events: **Crimean annexation** and **Russian invasion of Ukraine**. 

## Frequent use of Catchy Slogans: *Never abandon our people*

In [47]:
get_chart(df_scraped, people, "year_month", "Count of News Articles")


In [48]:
get_chart(df_scraped, people, "year_month", "Sum of Video Duration (hours)")


## Frequent use of Catchy Slogans: *Collective West*

In [51]:
get_chart(df_scraped, collective_west, "year_month", "Count of News Articles")


In [52]:
get_chart(df_scraped, collective_west, "year_month", "Sum of Video Duration (hours)")


## Frequent use of Catchy Slogans: *Everything for Victory*

In [55]:
get_chart(df_scraped, victory, "year_month", "Count of News Articles")


In [61]:
get_chart(df_scraped, victory, "year_month", "Sum of Video Duration (hours)")


## Frequent use of Catchy Slogans: *We are all together*

In [62]:
get_chart(df_scraped, together, "year_month", "Count of News Articles")


In [63]:
get_chart(df_scraped, together, "year_month", "Sum of Video Duration (hours)")


## Frequent use of Words: *Motherland*

In [64]:
get_chart(df_scraped, motherland, "year_month", "Count of News Articles")


In [65]:
get_chart(df_scraped, motherland, "year_month", "Sum of Video Duration (hours)")


## Attitudes of Russians toward Ukraine


### Data Loading
In the next section we load the dataset from Levada center containing the survey polls about attitude of Russians toward Ukraine

In [35]:
%cd ..

c:\Users\riccardoricci\OneDrive - KPMG\Documents\GitHub\rick0701\SIADS593_Project_Russian_Propaganda


In [36]:
attitude_ukraine = pd.read_csv("Levada data set/Ukraine.csv")
attitude_ukraine.head()


Unnamed: 0.1,Unnamed: 0,Month.Year,Positive,Negative,Difficult to answer,Month,Year,Date
0,0,12.1998,79.0,14.0,7.0,12,1998,1998-12-01
1,1,12.1999,78.0,15.0,8.0,12,1999,1999-12-01
2,2,7.2,71.0,23.0,6.0,7,2000,2000-07-01
3,3,7.2001,71.0,23.0,6.0,7,2001,2001-07-01
4,4,10.2001,54.0,35.0,11.0,10,2001,2001-10-01


In [185]:
def read_clean_df_attitude(file_path):
    """Read and clean the dataframe with attitutes of Russians toward a country (e.g. Ukraine)
    Args
    file_path - string containing the path of the file with the dataset e.g. "Levada data set/Ukraine.csv"
    Returns
    df - dataframe contanining the attitudes of Russian toward a country by period of time (e.g. year-month)
    """
    df = pd.read_csv(file_path)
    df = df[df["Year"] >= 2006]
    df["year_month"] = df["Date"].str.extract("(\d{4}-\d{2})", expand=True)
    df["Positive"] = df["Positive"] / 100
    df["Negative"] = df["Negative"] / 100
    df['Date'] = pd.to_datetime(df['Date'])
    return df


In [186]:
df_attitude_ukr = read_clean_df_attitude("Levada data set/Ukraine.csv")
df_attitude_ukr.head()


Unnamed: 0.1,Unnamed: 0,Month.Year,Positive,Negative,Difficult to answer,Month,Year,Date,year_month
25,25,2.2006,0.5,0.4,10.0,2,2006,2006-02-01,2006-02
26,26,3.2006,0.53,0.37,10.0,3,2006,2006-03-01,2006-03
27,27,4.2006,0.56,0.35,10.0,4,2006,2006-04-01,2006-04
28,28,5.2006,0.55,0.35,10.0,5,2006,2006-05-01,2006-05
29,29,6.2006,0.56,0.33,11.0,6,2006,2006-06-01,2006-06


In [429]:
def get_attitude_chart(df_attitude, attitude_type, date_from='2006', width_chart=800, height_chart=300):
    """Return a chart with the trend of attitude_type (i.e. Positive or Negative) toward a country e.g. Ukraine
    Args
    df_attitude - dataframe contanining the attitudes of Russian toward a country by period of time (e.g. year-month)
    width_chart - float representing the width of the chart. Default=600

    Returns
    attitude_chart - area chart with attitude of Russians toward a country
    """
    format_x = "%Y-%m"

    df_attitude=df_attitude[df_attitude['Date']>=date_from]
    
    attitude_chart = (
        alt.Chart(df_attitude)
        .mark_area(color="#E2BA41", fillOpacity=1)
        .encode(
            alt.X(
                "Date",
                type="temporal",
                title=None,
                axis=alt.Axis(
                    labelColor="#897C57",
                    labelFont="Franklin Gothic Demi Cond",
                    labelFontSize=18,
                    grid=False,
                    ticks=True,
                    labelAngle=-45,
                    format=format_x
                ),
            ),
            alt.Y(
                attitude_type,
                type="quantitative",
                axis=alt.Axis(
                    domain=False,
                    labelColor="#897C57",
                    labelFont="Franklin Gothic Demi Cond",
                    labelFontSize=18,
                    grid=False,
                    gridWidth=0.5,
                    ticks=True,
                    title="Negative Attitude of Russians toward Ukraine",
                    titleFontStyle="Franklin Gothic Demi Cond",
                    titleColor="#897C57",
                    format=("%"),
                    titleFontSize=35
                ),
            ),
        )
    ).properties(
        width=width_chart,
        height=height_chart,
        title={
        "text": ["Negative Attitude of Russians towrad Ukraine, source: Levada Center"],
        "font": "Franklin Gothic",
        "fontSize": 24,
        "color": "#897C57",
    }
    )

    return attitude_chart


In [430]:
# ch1: Chart1 Attitude of Russians toward Ukraine
ch1 = get_attitude_chart(df_attitude_ukr, "Negative", '2011').properties(
    
)
ch1


In [431]:
# ch2: Chart2 Video Duration featuring Ukraine

ch2 = get_chart(
    df_scraped,
    token_dict_ukraine,
    "year_month",
    "Sum of Video Duration (hours)",
    '2011',
)
ch2


In [433]:
(ch1 + ch2).resolve_scale(y='independent')

## Comparing *Negative Attitude of Russians toward Ukraine* vs *Monthly Video Durating Featuring Ukraine*

In [434]:
width_ch=800
height_ch=300

In [435]:
# creating a dummy dataframe for annotations in the final chart

annotations = pd.DataFrame(
    {
        "year_month": ["2014-07", "2022-07"],
        "y_axis_annotations": [0, 0],
        "annotations": ["Crimean annexation", "Invasion of Ukraine"],
    }
)
annotations


Unnamed: 0,year_month,y_axis_annotations,annotations
0,2014-07,0,Crimean annexation
1,2022-07,0,Invasion of Ukraine


In [462]:
# ch3: Chart3 Annotations

ch3 = (
    alt.Chart(annotations)
    .mark_text(color="#897C57", dy=230, fontStyle="bold", size=14)
    .encode(
        alt.X("year_month", type="temporal"),
        alt.Y(
            "y_axis_annotations",
            type="quantitative",
            axis=alt.Axis(labels=False, title=None, ticks=False),
        ),
        alt.Text("annotations", type="nominal"),
    )
).properties(width=width_ch, height=height_ch)


In [457]:
# creating a dummy dataframe to create the legend for ATTITUDE in the final chart

legend_attitude = pd.DataFrame(
    {
        "year_month": ["2017-01"],
        "y_axis_annotations": [70],
        "annotations": ["Negative Attitude of Russians toward Ukraine"],
    }
)
legend_attitude


Unnamed: 0,year_month,y_axis_annotations,annotations
0,2017-01,70,Negative Attitude of Russians toward Ukraine


In [487]:
# ch4: Chart4 Legend Attitude

ch4 = (
    alt.Chart(legend_attitude)
    .mark_text(color="#E2BA41", size=13, fontStyle="bold", dx=-255, dy=10)
    .encode(
        alt.X("year_month", type="temporal"),
        alt.Y(
            "y_axis_annotations",
            type="quantitative",
            axis=alt.Axis(labels=False, title=None, ticks=False),
        ),
        alt.Text("annotations", type="nominal"),
    ).properties(width=width_ch, height=height_ch)
)


In [484]:
# creating a dummy dataframe to create the legend for Video Duration in the final chart

legend_duration = pd.DataFrame(
    {
        "year_month": ["2018-06"],
        "y_axis_annotations": [170],
        "annotations": [
            "Total Monthly Video Duration of News Articles Featuring UKRAINE in Title, Tags, or Description"
        ],
    }
)
legend_duration


Unnamed: 0,year_month,y_axis_annotations,annotations
0,2018-06,170,Total Monthly Video Duration of News Articles ...


In [490]:
# ch5: Chart5 Legend Video Duration

ch5 = (
    alt.Chart(legend_duration)
    .mark_text(color="#76AAAC", size=13, fontStyle="bold", opacity=1, dx=-197, dy=10)
    .encode(
        alt.X("year_month", type="temporal"),
        alt.Y(
            "y_axis_annotations",
            type="quantitative",
            axis=alt.Axis(labels=False, title=None, ticks=False),
        ),
        alt.Text("annotations", type="nominal"),
    )
)


In [491]:
# final chart comparing attitude and video duration featuring Ukraine
chart_a = (
    alt.layer(ch1, ch2, ch3, ch4, ch5)
    .configure_view(strokeWidth=1.2, stroke="#897C57")
    .properties(
        title={
            "text": [
                "Negative Attitude of Russians toward Ukraine, source: Levada Center",
                "vs",
                "Total Monthly Video Duration of News Articles Featuring UKRAINE in Title, Tags, or Description",
            ],
            "font": "Franklin Gothic",
            "fontSize": 22,
            "color": "#897C57",
        }
    )
    .resolve_scale(y="independent")
)

chart_a


Computing the correlation coefficient. First, we need to join the two dataframes

In [496]:
df_scr_ukr = masking_df_with_word(df_scraped, token_dict_ukraine)
df_scr_ukr = grouping(df_scr_ukr, 'year_month')
df_scr_ukr

Unnamed: 0,year_month,Count of News Articles,Sum of Video Duration (hours)
0,2006-09-01,43,1.887222
1,2006-10-01,50,2.731111
2,2006-11-01,49,1.941389
3,2006-12-01,49,2.280278
4,2007-01-01,44,1.822778
...,...,...,...
192,2022-09-01,735,33.128056
193,2022-10-01,790,30.088333
194,2022-11-01,789,28.842778
195,2022-12-01,711,25.822778


In [495]:
df_attitude_ukr

Unnamed: 0.1,Unnamed: 0,Month.Year,Positive,Negative,Difficult to answer,Month,Year,Date,year_month
25,25,2.2006,0.50,0.40,10.0,2,2006,2006-02-01,2006-02
26,26,3.2006,0.53,0.37,10.0,3,2006,2006-03-01,2006-03
27,27,4.2006,0.56,0.35,10.0,4,2006,2006-04-01,2006-04
28,28,5.2006,0.55,0.35,10.0,5,2006,2006-05-01,2006-05
29,29,6.2006,0.56,0.33,11.0,6,2006,2006-06-01,2006-06
...,...,...,...,...,...,...,...,...,...
121,121,2.2022,0.35,0.52,13.0,2,2022,2022-02-01,2022-02
122,122,3.2022,0.30,0.57,13.0,3,2022,2022-03-01,2022-03
123,123,5.2022,0.23,0.66,11.0,5,2022,2022-05-01,2022-05
124,124,8.2022,0.23,0.66,11.0,8,2022,2022-08-01,2022-08


In [498]:
df_merge = pd.merge(df_scr_ukr, df_attitude_ukr, left_on='year_month', right_on='Date')
df_merge

Unnamed: 0.1,year_month_x,Count of News Articles,Sum of Video Duration (hours),Unnamed: 0,Month.Year,Positive,Negative,Difficult to answer,Month,Year,Date,year_month_y
0,2006-10-01,50,2.731111,32,10.2006,0.61,0.31,9.0,10,2006,2006-10-01,2006-10
1,2006-11-01,49,1.941389,33,11.2006,0.66,0.24,9.0,11,2006,2006-11-01,2006-11
2,2006-12-01,49,2.280278,34,12.2006,0.64,0.27,9.0,12,2006,2006-12-01,2006-12
3,2007-01-01,44,1.822778,35,1.2007,0.67,0.24,8.0,1,2007,2007-01-01,2007-01
4,2007-02-01,31,2.508333,36,2.2007,0.61,0.29,11.0,2,2007,2007-02-01,2007-02
...,...,...,...,...,...,...,...,...,...,...,...,...
89,2022-02-01,737,36.992778,121,2.2022,0.35,0.52,13.0,2,2022,2022-02-01,2022-02
90,2022-03-01,1944,75.413611,122,3.2022,0.30,0.57,13.0,3,2022,2022-03-01,2022-03
91,2022-05-01,1163,39.579444,123,5.2022,0.23,0.66,11.0,5,2022,2022-05-01,2022-05
92,2022-08-01,917,32.417778,124,8.2022,0.23,0.66,11.0,8,2022,2022-08-01,2022-08


In [504]:
 from scipy import stats

In [508]:
np.correlate(stats.zscore(df_merge['Count of News Articles']), stats.zscore(df_merge['Negative']), mode='full')

array([-1.01714600e+00, -1.90263506e+00, -2.78730327e+00, -3.39241524e+00,
       -3.89823927e+00, -4.02187242e+00, -3.79752404e+00, -4.00235586e+00,
       -3.71622541e+00, -3.49112605e+00, -3.67014474e+00, -4.15016657e+00,
       -3.65024178e+00, -3.68076956e+00, -4.41834404e+00, -5.31117054e+00,
       -5.62494620e+00, -5.65634285e+00, -5.96313692e+00, -5.82587726e+00,
       -4.89359147e+00, -5.20734748e+00, -5.70848219e+00, -6.54281438e+00,
       -7.30559508e+00, -8.51902905e+00, -8.41867708e+00, -9.12612243e+00,
       -1.06393610e+01, -1.14903787e+01, -1.14514464e+01, -1.23096335e+01,
       -1.35756346e+01, -1.27574055e+01, -1.28555181e+01, -1.30632993e+01,
       -1.40009407e+01, -1.44323818e+01, -1.52759616e+01, -1.57317356e+01,
       -1.55569199e+01, -1.47370884e+01, -1.38506196e+01, -1.35270291e+01,
       -1.32070844e+01, -1.22087377e+01, -1.24452814e+01, -1.15494470e+01,
       -1.14687736e+01, -1.10395445e+01, -1.00351986e+01, -8.39166578e+00,
       -1.79607938e+00,  

## Comparing *Negative Attitude of Russians toward Ukraine* vs *Monthly Video Durating Featuring Ukraine and Nationalist*

In [284]:
ch2bis = get_chart(
    df_scraped,
    token_dict_ukraine,
    "year_month",
    "Count of News Articles",
    '2011',
    width_chart=1000,
    slogan2=token_dict_nationalist,
).properties(
    title={
        "text": [
            "Monthly Count of Video Duration (hours) Featuring Ukraine & Nationalists in Title, Tags, or Description"
        ],
        "font": "Franklin Gothic",
        "fontSize": 16,
        "color": "#897C57",
    }
)
ch2bis


In [280]:
legend_count = pd.DataFrame(
    {
        "year_month": ["2018-06"],
        "y_axis_annotations": [120],
        "annotations": [
            "Monthly Count of News Articles Featuring UKRAINE & NATIONALIST in Title, Tags, or Description"
        ],
    }
)
legend_count


Unnamed: 0,year_month,y_axis_annotations,annotations
0,2018-06,120,Monthly Count of News Articles Featuring UKRAI...


In [285]:
# ch5: Chart5 Legend Video Duration

ch5_bis = (
    alt.Chart(legend_count)
    .mark_text(color="#76AAAC", size=12, fontStyle="bold", opacity=1, dx=-440, dy=25)
    .encode(
        alt.X("year_month", type="temporal"),
        alt.Y(
            "y_axis_annotations",
            type="quantitative",
            axis=alt.Axis(labels=False, title=None, ticks=False),
        ),
        alt.Text("annotations", type="nominal"),
    )
)


In [286]:
alt.layer(ch1, ch2bis, ch3, ch4, ch5_bis).configure_view(
    strokeWidth=1.2, stroke="#897C57"
).properties(
    title={
        "text": [
            "Negative Attitude Towards Ukraine of Russian Population",
            "vs",
            "Monthly Count of News Articles Featuring UKRAINE & NATIONALIST in Title, Tags, or Description",
        ],
        "font": "Franklin Gothic",
        "fontSize": 16,
        "color": "#897C57",
    }
).resolve_scale(
    y="independent"
)


## Comparing *Negative Attitude of Russians toward Ukraine* vs *Monthly Video Durating Featuring Ukraine and Nazism*

In [53]:
ch2_tris = get_chart(
    df_scraped,
    token_dict_ukraine,
    "year_month",
    "Count of News Articles",
    width_chart=1000,
    slogan2=nazi,
).properties(
    title={
        "text": [
            "Monthly Count of News Articles Featuring UKRAINE & NAZISM in Title, Tags, or Description"
        ],
        "font": "Franklin Gothic",
        "fontSize": 16,
        "color": "#897C57",
    }
)
ch2_tris


In [54]:
legend_count = pd.DataFrame(
    {
        "year_month": ["2018-06"],
        "y_axis_annotations": [120],
        "annotations": [
            "Monthly Count of News Articles Featuring UKRAINE & NAZISM in Title, Tags, or Description"
        ],
    }
)
legend_count


Unnamed: 0,year_month,y_axis_annotations,annotations
0,2018-06,120,Monthly Count of News Articles Featuring UKRAI...


In [55]:
# ch5: Chart5 Legend Video Duration

ch5_tris = (
    alt.Chart(legend_count)
    .mark_text(color="#76AAAC", size=12, fontStyle="bold", opacity=1, dx=-455, dy=25)
    .encode(
        alt.X("year_month", type="temporal"),
        alt.Y(
            "y_axis_annotations",
            type="quantitative",
            axis=alt.Axis(labels=False, title=None, ticks=False),
        ),
        alt.Text("annotations", type="nominal"),
    )
)


In [56]:
alt.layer(ch1, ch2_tris, ch3, ch4, ch5_tris).configure_view(
    strokeWidth=1.2, stroke="#897C57"
).properties(
    title={
        "text": [
            "Negative Attitude Towards Ukraine of Russian Population",
            "vs",
            "Monthly Count of News Articles Featuring UKRAINE & NAZISM in Title, Tags, or Description",
        ],
        "font": "Franklin Gothic",
        "fontSize": 16,
        "color": "#897C57",
    }
).resolve_scale(
    y="independent"
)
