## *Data loading and extraction*

In [4]:
! pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.2-py2.py3-none-any.whl (249 kB)
Collecting et-xmlfile
  Downloading et_xmlfile-1.1.0-py3-none-any.whl (4.7 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-1.1.0 openpyxl-3.1.2


You should consider upgrading via the 'D:\python_3.10\python.exe -m pip install --upgrade pip' command.


In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests

In [2]:
# Read Input File
input_data = pd.read_excel('Input.xlsx')

In [3]:
# Data Extraction Loop
for index, row in input_data.iterrows():
    url = row['URL']
    
    # Use web scraping to extract article text from the URL
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Assuming the article text is contained within a specific HTML tag (adjust as needed)
    article_element = soup.find('div', class_='td-post-content tagdiv-type') # Update with the actual tag
    
    # Check if the element was found before trying to access its text content
    if article_element:
        article_text = article_element.get_text()
    else:
        # Handle the case when the element is not found
        article_text = "No article text found"
    
    # Save the article text in a text file with the name as URL_ID
    with open(f"{row['URL_ID']}.txt", 'w', encoding='utf-8') as file:
        file.write(article_text)

## *Text/data analysis*

In [3]:
import pandas as pd
import nltk
from nltk.tokenize import word_tokenize, sent_tokenize
from nltk.corpus import stopwords

In [4]:
# Assuming your text files are named as per the URL_ID
file_names = input_data['URL_ID'].apply(lambda x: f"{x}.txt")

In [5]:
# Ensure NLTK resources are downloaded
nltk.download('punkt')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to C:\Users\Soham
[nltk_data]     Tolwala\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Soham
[nltk_data]     Tolwala\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [6]:
# Read the Stop Words Lists
stop_words_auditor = set(pd.read_csv('StopWords/StopWords_Auditor.txt', header=None, squeeze=True))



  stop_words_auditor = set(pd.read_csv('StopWords/StopWords_Auditor.txt', header=None, squeeze=True))


In [7]:
# Read the StopWords_Currencies.txt file with the correct encoding and delimiter
stop_words_currencies = pd.read_csv('StopWords/StopWords_Currencies.txt', sep='|', header=None, names=['Currency', 'Country'], encoding='ISO-8859-1')

# Assuming you want to use the 'Currency' column as stop words
stop_words_currencies = set(stop_words_currencies['Currency'].str.strip())

In [8]:
stop_words_datesandnumbers = set(pd.read_csv('StopWords/StopWords_DatesandNumbers.txt', header=None, squeeze=True))
stop_words_generic = set(pd.read_csv('StopWords/StopWords_Generic.txt', header=None, squeeze=True))
stop_words_genericlong = set(pd.read_csv('StopWords/StopWords_GenericLong.txt', header=None, squeeze=True))
stop_words_geographic = set(pd.read_csv('StopWords/StopWords_Geographic.txt', header=None, squeeze=True))
stop_words_names = set(pd.read_csv('StopWords/StopWords_Names.txt', header=None, squeeze=True))



  stop_words_datesandnumbers = set(pd.read_csv('StopWords/StopWords_DatesandNumbers.txt', header=None, squeeze=True))


  stop_words_generic = set(pd.read_csv('StopWords/StopWords_Generic.txt', header=None, squeeze=True))


  stop_words_genericlong = set(pd.read_csv('StopWords/StopWords_GenericLong.txt', header=None, squeeze=True))


  stop_words_geographic = set(pd.read_csv('StopWords/StopWords_Geographic.txt', header=None, squeeze=True))


  stop_words_names = set(pd.read_csv('StopWords/StopWords_Names.txt', header=None, squeeze=True))


In [9]:
# Read the Master Dictionary
positive_words = set(pd.read_csv('MasterDictionary/positive-words.txt', header=None, squeeze=True))



  positive_words = set(pd.read_csv('MasterDictionary/positive-words.txt', header=None, squeeze=True))


In [10]:
# Read the negative-words.txt file with the correct encoding
negative_words = pd.read_csv('MasterDictionary/negative-words.txt', header=None, squeeze=True, encoding='ISO-8859-1')

# Assuming you want to use the column as a set of negative words
negative_words = set(negative_words.str.strip())



  negative_words = pd.read_csv('MasterDictionary/negative-words.txt', header=None, squeeze=True, encoding='ISO-8859-1')


## Functions to data cleaning and calculating variables

In [11]:
# Function to clean text using stop words
def clean_text(text, stop_words):
    tokens = word_tokenize(text)
    cleaned_tokens = [token.lower() for token in tokens if token.isalpha() and token.lower() not in stop_words]
    return cleaned_tokens

# Function to calculate sentiment scores
def calculate_sentiment_scores(tokens, positive_words, negative_words):
    positive_score = sum(1 for word in tokens if word in positive_words)
    negative_score = sum(1 for word in tokens if word in negative_words)
    
    # Calculate Polarity Score
    polarity_score = (positive_score - negative_score) / ((positive_score + negative_score) + 0.000001)
    
    # Calculate Subjectivity Score
    subjectivity_score = (positive_score + negative_score) / (len(tokens) + 0.000001)
    
    return positive_score, negative_score, polarity_score, subjectivity_score

# Function to calculate readability
def calculate_readability(tokens):
    num_words = len(tokens)
    num_sentences = len(sent_tokenize(' '.join(tokens)))
    avg_sentence_length = num_words / num_sentences
    
    # Assuming complex words are those with more than two syllables
    complex_words = [word for word in tokens if len(nltk.word_tokenize(word)) > 2]
    percentage_complex_words = len(complex_words) / num_words
    fog_index = 0.4 * (avg_sentence_length + percentage_complex_words)
    
    return avg_sentence_length, percentage_complex_words, fog_index

# Function to calculate average number of words per sentence
def calculate_avg_words_per_sentence(tokens):
    num_words = len(tokens)
    num_sentences = len(sent_tokenize(' '.join(tokens)))
    return num_words / num_sentences

# Function to count complex words
def count_complex_words(tokens):
    return sum(1 for word in tokens if len(nltk.word_tokenize(word)) > 2)

# Function to count syllables per word
def count_syllables_per_word(tokens):
    syllables = [len(nltk.word_tokenize(word)) for word in tokens]
    return sum(syllables) / len(tokens)

# Function to count personal pronouns
def count_personal_pronouns(text):
    personal_pronouns = ['i', 'we', 'my', 'ours', 'us']
    # Use regex to find counts of personal pronouns
    return sum(1 for pronoun in personal_pronouns if re.search(rf'\b{pronoun}\b', text, flags=re.IGNORECASE))

# Function to calculate average word length
def calculate_avg_word_length(tokens):
    total_characters = sum(len(word) for word in tokens)
    return total_characters / len(tokens)

In [12]:
# Read the extracted articles
# Assuming file_names contains the names of the text files
article_texts = []
for file_name in file_names:
    with open(file_name, 'r', encoding='utf-8') as file:
        article_texts.append(file.read())


In [13]:
# Initialize DataFrames to store the results
sentiment_df = pd.DataFrame(columns=['POSITIVE SCORE', 'NEGATIVE SCORE', 'POLARITY SCORE', 'SUBJECTIVITY SCORE'])
readability_df = pd.DataFrame(columns=['AVG SENTENCE LENGTH', 'PERCENTAGE OF COMPLEX WORDS', 'FOG INDEX'])
other_variables_df = pd.DataFrame(columns=['AVG NUMBER OF WORDS PER SENTENCE', 'COMPLEX WORD COUNT', 'WORD COUNT',
                                            'SYLLABLE PER WORD', 'PERSONAL PRONOUNS', 'AVG WORD LENGTH'])

In [14]:
all_stop_words = (
    stop_words_auditor.union(stop_words_currencies, stop_words_datesandnumbers, 
                             stop_words_generic, stop_words_genericlong, 
                             stop_words_geographic, stop_words_names)
)

In [15]:
import re

# Perform text analysis for each article
for text in article_texts:
    # Step 1: Clean the text using stop words
    cleaned_tokens = clean_text(text, all_stop_words)
    
    # Step 2: Calculate sentiment scores
    sentiment_scores = calculate_sentiment_scores(cleaned_tokens, positive_words, negative_words)
    sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
    
    # Step 3: Calculate readability
    readability_scores = calculate_readability(cleaned_tokens)
    readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
    
    # Step 4: Calculate other variables
    avg_words_per_sentence = calculate_avg_words_per_sentence(cleaned_tokens)
    complex_word_count = count_complex_words(cleaned_tokens)
    word_count = len(cleaned_tokens)
    syllable_count_per_word = count_syllables_per_word(cleaned_tokens)
    personal_pronouns = count_personal_pronouns(text)
    avg_word_length = calculate_avg_word_length(cleaned_tokens)
    
    other_variables_df = other_variables_df.append({
        'Average Number of Words Per Sentence': avg_words_per_sentence,
        'Complex Word Count': complex_word_count,
        'Word Count': word_count,
        'Syllable Count Per Word': syllable_count_per_word,
        'Personal Pronouns': personal_pronouns,
        'Average Word Length': avg_word_length
    }, ignore_index=True)


  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, 

  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df 

  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = read

  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = read

  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = read

  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, 

  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = read

  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df 

  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = read

  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = readability_df.append(dict(zip(readability_df.columns, readability_scores)), ignore_index=True)
  other_variables_df = other_variables_df.append({
  sentiment_df = sentiment_df.append(dict(zip(sentiment_df.columns, sentiment_scores)), ignore_index=True)
  readability_df = read

In [16]:
# Combine the results into a single DataFrame
output_data = pd.concat([sentiment_df, readability_df, other_variables_df], axis=1)

In [18]:
output_data.drop(['AVG NUMBER OF WORDS PER SENTENCE', 'COMPLEX WORD COUNT', 'WORD COUNT', 'SYLLABLE PER WORD', 'PERSONAL PRONOUNS', 'AVG WORD LENGTH'], axis='columns', inplace=True)

In [21]:
# converting to uppercase
columns_to_uppercase = ['Average Number of Words Per Sentence', 'Complex Word Count', 'Word Count', 'Syllable Count Per Word', 'Personal Pronouns', 'Average Word Length']

# Use the rename method to convert selected columns to uppercase
output_data.rename(columns={col: col.upper() for col in columns_to_uppercase}, inplace=True)


In [23]:
output_data.rename(columns={'AVERAGE NUMBER OF WORDS PER SENTENCE':'AVG NUMBER OF WORDS PER SENTENCE', 'AVERAGE WORD LENGTH':'AVG WORD LENGTH'}, inplace=True)

In [24]:
output_data

Unnamed: 0,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE COUNT PER WORD,PERSONAL PRONOUNS,AVG WORD LENGTH
0,33.0,6.0,0.692308,0.072491,538.0,0.0,215.2,538.0,0.0,538.0,1.0,3.0,6.665428
1,58.0,31.0,0.303371,0.117569,757.0,0.0,302.8,757.0,0.0,757.0,1.0,2.0,7.461030
2,38.0,24.0,0.225806,0.104377,594.0,0.0,237.6,594.0,0.0,594.0,1.0,2.0,8.308081
3,36.0,75.0,-0.351351,0.187184,593.0,0.0,237.2,593.0,0.0,593.0,1.0,1.0,8.124789
4,22.0,8.0,0.466667,0.085714,350.0,0.0,140.0,350.0,0.0,350.0,1.0,2.0,7.422857
...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,27.0,57.0,-0.357143,0.154128,545.0,0.0,218.0,545.0,0.0,545.0,1.0,3.0,7.264220
96,21.0,35.0,-0.250000,0.135922,412.0,0.0,164.8,412.0,0.0,412.0,1.0,3.0,6.740291
97,5.0,3.0,0.250000,0.037915,211.0,0.0,84.4,211.0,0.0,211.0,1.0,0.0,7.090047
98,0.0,0.0,0.000000,0.000000,3.0,0.0,1.2,3.0,0.0,3.0,1.0,0.0,5.333333


In [25]:
# Load your existing excel file
output_xlsx_df = pd.read_excel('Output Data Structure.xlsx')

In [27]:
output_xlsx_df = output_xlsx_df[['URL_ID', 'URL']]

In [32]:
# Now, you can merge the data
merged_df_2 = pd.concat([output_xlsx_df, output_data], axis='columns')

In [33]:
merged_df_2

Unnamed: 0,URL_ID,URL,POSITIVE SCORE,NEGATIVE SCORE,POLARITY SCORE,SUBJECTIVITY SCORE,AVG SENTENCE LENGTH,PERCENTAGE OF COMPLEX WORDS,FOG INDEX,AVG NUMBER OF WORDS PER SENTENCE,COMPLEX WORD COUNT,WORD COUNT,SYLLABLE COUNT PER WORD,PERSONAL PRONOUNS,AVG WORD LENGTH
0,blackassign0001,https://insights.blackcoffer.com/rising-it-cit...,33.0,6.0,0.692308,0.072491,538.0,0.0,215.2,538.0,0.0,538.0,1.0,3.0,6.665428
1,blackassign0002,https://insights.blackcoffer.com/rising-it-cit...,58.0,31.0,0.303371,0.117569,757.0,0.0,302.8,757.0,0.0,757.0,1.0,2.0,7.461030
2,blackassign0003,https://insights.blackcoffer.com/internet-dema...,38.0,24.0,0.225806,0.104377,594.0,0.0,237.6,594.0,0.0,594.0,1.0,2.0,8.308081
3,blackassign0004,https://insights.blackcoffer.com/rise-of-cyber...,36.0,75.0,-0.351351,0.187184,593.0,0.0,237.2,593.0,0.0,593.0,1.0,1.0,8.124789
4,blackassign0005,https://insights.blackcoffer.com/ott-platform-...,22.0,8.0,0.466667,0.085714,350.0,0.0,140.0,350.0,0.0,350.0,1.0,2.0,7.422857
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,blackassign0096,https://insights.blackcoffer.com/what-is-the-r...,27.0,57.0,-0.357143,0.154128,545.0,0.0,218.0,545.0,0.0,545.0,1.0,3.0,7.264220
96,blackassign0097,https://insights.blackcoffer.com/impact-of-cov...,21.0,35.0,-0.250000,0.135922,412.0,0.0,164.8,412.0,0.0,412.0,1.0,3.0,6.740291
97,blackassign0098,https://insights.blackcoffer.com/contribution-...,5.0,3.0,0.250000,0.037915,211.0,0.0,84.4,211.0,0.0,211.0,1.0,0.0,7.090047
98,blackassign0099,https://insights.blackcoffer.com/how-covid-19-...,0.0,0.0,0.000000,0.000000,3.0,0.0,1.2,3.0,0.0,3.0,1.0,0.0,5.333333


In [34]:
merged_df_2.to_excel('New_Output_File.xlsx', index=False)