# Text Analysis

In [1]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import os
from nltk.corpus import stopwords
import string
from textstat.textstat import textstatistics
import re

In [71]:
df = pd.read_excel('Input.xlsx')

In [3]:
df.head()

Unnamed: 0,URL_ID,URL
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...
1,38.0,https://insights.blackcoffer.com/what-if-the-c...
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...
3,40.0,https://insights.blackcoffer.com/will-machine-...
4,41.0,https://insights.blackcoffer.com/will-ai-repla...


In [66]:
#null values
df.isnull().sum()

URL_ID    0
URL       0
dtype: int64

In [6]:
#Fetching article from urls

In [4]:
def obtain_url_text(url):
    headers={'User-Agent':'Mozilla/5.0 (Windows NT 6.3; Win 64 ; x64) Apple WeKit /537.36(KHTML , like Gecko) Chrome/80.0.3987.162 Safari/537.36'}
    article = requests.get(url,headers=headers).text
    soup = BeautifulSoup(article,'html.parser')
    paras = soup.find_all('p')
    
    lst = []
    for i in soup.find_all('p'):
        lst.append(i.text)
    return ' '.join(lst)


In [72]:
df['URL_Text'] = df['URL'].apply(obtain_url_text)

In [6]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ..."
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...


In [73]:
#null values in articles
df.isnull().sum()

URL_ID      0
URL         0
URL_Text    0
dtype: int64

In [83]:
df[df['URL_Text'].values == ''].shape[0]

3

In [None]:
#There are 3 empty articles in input dataframe.

In [7]:
#Storing all stop_words in one file

merged_data = ''
data_folder = os.path.join(os.getcwd(),'StopWords')
data = []
for root,folders,files in os.walk(data_folder):
    for file in files:
        path = os.path.join(root,file)
        with open(file) as info:
            merged_data = merged_data+info.read()

In [None]:
#Cleaning using Stop Words Lists

In [8]:
def remove_stopwords_from_list(text):
    y = []
    for i in text.lower().split():
        if i not in merged_data:
            y.append(i)
    return ' '.join(y)

In [9]:
df['Cleaned_Text'] = df['URL_Text'].apply(remove_stopwords_from_list)

In [10]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti..."
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...,“anything rise smarter-than-human intelligence...
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...,“machine intelligence invention humanity make”...


In [11]:
#Calculating Positive score
f_p = open('positive-words.txt','r',encoding='utf-8')
positive_words = f_p.read()

def positive_score(text):
    pos_score = 0
    for i in text.split():
        if i in positive_words:
            pos_score+=1
    return pos_score

In [12]:
df['Positive Score'] = df['Cleaned_Text'].apply(positive_score)

In [13]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...,“anything rise smarter-than-human intelligence...,121
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...,“machine intelligence invention humanity make”...,118


In [15]:
#Calculating Negative score
f_n = open('negative-words.txt','r')
negative_words = f_n.read()

def negative_score(text):
    neg_score = 0
    for i in text.split():
        if i in negative_words:
            neg_score+=1
    return neg_score

In [16]:
df['Negative Score'] = df['Cleaned_Text'].apply(negative_score)

In [27]:
df.head(2)

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Negative Score,Positive Score
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,141,109
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",111,102


In [17]:
#Calculating Polarity Score
df['Polarity Score'] = (df['Positive Score'] - df['Negative Score'])/ ((df['Positive Score'] + df['Negative Score']) + 0.000001)
    

In [18]:
df.head(3)

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117


In [19]:
def total_words_after_cleaning(text):
    return len(text.split())

def total_sentences_after_cleaning(text):
    return len(text.split('.'))


In [20]:
#Calculating Subjectivity Score
df['Subjectivity Score'] = (df['Positive Score'] + df['Negative Score'])/ ( df['Cleaned_Text'].apply(total_words_after_cleaning) + 0.000001)

In [21]:
df.head(3)

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551


In [22]:
#Average Sentence Length = the number of words / the number of sentences
df['Average Sentence Length'] = df['Cleaned_Text'].apply(total_words_after_cleaning)/df['Cleaned_Text'].apply(total_sentences_after_cleaning)

In [23]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...,“anything rise smarter-than-human intelligence...,121,123,-0.008197,0.363095,8.096386
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...,“machine intelligence invention humanity make”...,118,119,-0.004219,0.289377,10.111111


In [32]:
#Percentage of Complex words = the number of complex words / the number of words 
#Complex words are words in the text that contain more than two syllables.
#syllables: sounds of vowels

In [25]:
def total_complex_words(text):
    c_count = 0
    for word in text.split():
        if(textstatistics().syllable_count(word) > 2 ):
            c_count+=1
    return c_count

In [26]:
df['Percentage of Complex words'] = df['Cleaned_Text'].apply(total_complex_words)/ df['Cleaned_Text'].apply(total_words_after_cleaning)

In [27]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...,“anything rise smarter-than-human intelligence...,121,123,-0.008197,0.363095,8.096386,0.311012
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...,“machine intelligence invention humanity make”...,118,119,-0.004219,0.289377,10.111111,0.30525


In [None]:
#Fog Index = 0.4 * (Average Sentence Length + Percentage of Complex words)

In [28]:
df['Fog Index'] = 0.4 * (df['Average Sentence Length'] + df['Percentage of Complex words'])

In [29]:
df.head(3)

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792,3.653459
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833


In [30]:
df['Average Number of Words Per Sentence'] = df['Cleaned_Text'].apply(total_words_after_cleaning)/df['Cleaned_Text'].apply(total_sentences_after_cleaning)

In [31]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index,Average Number of Words Per Sentence
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383,12.831169
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792,3.653459,8.842857
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833,10.070588
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...,“anything rise smarter-than-human intelligence...,121,123,-0.008197,0.363095,8.096386,0.311012,3.362959,8.096386
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...,“machine intelligence invention humanity make”...,118,119,-0.004219,0.289377,10.111111,0.30525,4.166545,10.111111


In [32]:
df['Complex Word Count'] = df['Cleaned_Text'].apply(total_complex_words)

In [33]:
df.head(3)

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index,Average Number of Words Per Sentence,Complex Word Count
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383,12.831169,353
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792,3.653459,8.842857,180
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833,10.070588,318


In [34]:
punctuations = '''!()-[]{};:'"’“”\,<>./?@#$%^&*_~'''
def remove_nltk_stopwords_punc(text):
    y = []
    for word in text.lower().split():
        if word not in stopwords.words('english'):
            for ch in word:
                if ch in punctuations:
                    word = word.replace(ch,'')
            y.append(word)
    return ' '.join(y)



    

In [35]:
df['nltk_stopwords_cleaned_text'] = df['Cleaned_Text'].apply(remove_nltk_stopwords_punc)

In [36]:
df.head(3)

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index,Average Number of Words Per Sentence,Complex Word Count,nltk_stopwords_cleaned_text
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383,12.831169,353,introduction if kills 10 million people decade...
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792,3.653459,8.842857,180,human minds fascination carrying potential tin...
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833,10.070588,318,introduction rapidly evolving employment secto...


In [37]:
df['nltk_stopwords_cleaned_text'][0]

'introduction if kills 10 million people decades highly infectious virus war missiles microbes bill gatess remarks conference 2014 world avoided ebola outbreak new unprecedented invisible virus us overwhelmed unprepared healthcare system oblivious population health emergency demonstrated lack scientific consideration underlined alarming robust innovations health medical facilities past years artificial intelligence proven tangible potential healthcare sectors clinical practices translational medical biomedical research case detected china december 31st 2019 program developed bluedot alerted world pandemic quick realise ais ability analyse large chunks data detecting patterns identifying tracking carriers virus tracing apps tabs people infected prevent risk crossinfection algorithms track patterns extract features classify categorise them that ibm watson sophisticated works cloud computing natural language processing prominently contributed healthcare sector global level conversational 

In [112]:
df['URL_Text'][0]

'Introduction “If anything kills over 10 million people in the next few decades, it will be a highly infectious virus rather than a war. Not missiles but microbes.” Bill Gates’s remarks at a TED conference in 2014, right after the world had avoided the Ebola outbreak. When the new, unprecedented, invisible virus hit us, it met an overwhelmed and unprepared healthcare system and oblivious population. This public health emergency demonstrated our lack of scientific consideration and underlined the alarming need for robust innovations in our health and medical facilities. For the past few years, artificial intelligence has proven to be of tangible potential in the healthcare sectors, clinical practices, translational medical and biomedical research. After the first case was detected in China on December 31st 2019, it was an AI program developed by BlueDot that alerted the world about the pandemic. It was quick to realise AI’s ability to analyse large chunks of data could help in detecting

In [38]:
df['Word Count'] = df['nltk_stopwords_cleaned_text'].apply(total_words_after_cleaning)

In [39]:
df['Syllable Count Per Word'] = df['nltk_stopwords_cleaned_text'].apply(lambda x : textstatistics().syllable_count(x) )/df['nltk_stopwords_cleaned_text'].apply(total_words_after_cleaning)

In [40]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index,Average Number of Words Per Sentence,Complex Word Count,nltk_stopwords_cleaned_text,Word Count,Syllable Count Per Word
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383,12.831169,353,introduction if kills 10 million people decade...,988,2.299595
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792,3.653459,8.842857,180,human minds fascination carrying potential tin...,617,2.071313
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833,10.070588,318,introduction rapidly evolving employment secto...,856,2.34229
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...,“anything rise smarter-than-human intelligence...,121,123,-0.008197,0.363095,8.096386,0.311012,3.362959,8.096386,209,anything rise smarterthanhuman intelligence – ...,672,2.10119
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...,“machine intelligence invention humanity make”...,118,119,-0.004219,0.289377,10.111111,0.30525,4.166545,10.111111,250,machine intelligence invention humanity make n...,819,2.150183


In [52]:
def personal_pronouns(text):
    pronounRegex = re.compile(r'\b(I|you|he|she|it|they|me|him|her|them|we|my|ours|(?-i:us))\b',re.I)
    pronouns = pronounRegex.findall(text)
    return len(pronouns)

    

In [None]:
#What are the 10 personal pronouns?
#I, you, he, she, it, we they, me, him, her, us, and them 

In [53]:
df['Personal Pronouns'] = df['Cleaned_Text'].apply(personal_pronouns)

In [54]:
df.head(3)

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index,Average Number of Words Per Sentence,Complex Word Count,nltk_stopwords_cleaned_text,Word Count,Syllable Count Per Word,Personal Pronouns,Average Word Length
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383,12.831169,353,introduction if kills 10 million people decade...,988,2.299595,3,0.115385
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792,3.653459,8.842857,180,human minds fascination carrying potential tin...,617,2.071313,18,0.184765
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833,10.070588,318,introduction rapidly evolving employment secto...,856,2.34229,5,0.133178


In [49]:
df['Average Word Length'] = len(df['nltk_stopwords_cleaned_text'])/df['nltk_stopwords_cleaned_text'].apply(total_words_after_cleaning)

In [50]:
df.head()

Unnamed: 0,URL_ID,URL,URL_Text,Cleaned_Text,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index,Average Number of Words Per Sentence,Complex Word Count,nltk_stopwords_cleaned_text,Word Count,Syllable Count Per Word,Personal Pronouns,Average Word Length
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,Introduction “If anything kills over 10 millio...,introduction “if kills 10 million people decad...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383,12.831169,353,introduction if kills 10 million people decade...,988,2.299595,"[us, them, it]",0.115385
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,"Human minds, a fascination in itself carrying ...","human minds, fascination carrying potential ti...",102,111,-0.042254,0.344103,8.842857,0.290792,3.653459,8.842857,180,human minds fascination carrying potential tin...,617,2.071313,"[him, it, they, it, it, it, it, they, it, it, ...",0.184765
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,Introduction AI is rapidly evolving in the emp...,introduction rapidly evolving employment secto...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833,10.070588,318,introduction rapidly evolving employment secto...,856,2.34229,"[them, it, we, them, it]",0.133178
3,40.0,https://insights.blackcoffer.com/will-machine-...,“Anything that could give rise to smarter-than...,“anything rise smarter-than-human intelligence...,121,123,-0.008197,0.363095,8.096386,0.311012,3.362959,8.096386,209,anything rise smarterthanhuman intelligence – ...,672,2.10119,"[her, them]",0.169643
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,“Machine intelligence is the last invention th...,“machine intelligence invention humanity make”...,118,119,-0.004219,0.289377,10.111111,0.30525,4.166545,10.111111,250,machine intelligence invention humanity make n...,819,2.150183,"[we, them, they, they, we, we, you, it]",0.139194


In [55]:
file_name = 'Dummy_Output.xlsx'
df.to_excel(file_name)

In [56]:
df.drop(['URL_Text', 'Cleaned_Text', 'nltk_stopwords_cleaned_text'], axis=1, inplace=True)

In [57]:
df.head()

Unnamed: 0,URL_ID,URL,Positive Score,Negative Score,Polarity Score,Subjectivity Score,Average Sentence Length,Percentage of Complex words,Fog Index,Average Number of Words Per Sentence,Complex Word Count,Word Count,Syllable Count Per Word,Personal Pronouns,Average Word Length
0,37.0,https://insights.blackcoffer.com/ai-in-healthc...,109,141,-0.128,0.253036,12.831169,0.357287,5.275383,12.831169,353,988,2.299595,3,0.115385
1,38.0,https://insights.blackcoffer.com/what-if-the-c...,102,111,-0.042254,0.344103,8.842857,0.290792,3.653459,8.842857,180,617,2.071313,18,0.184765
2,39.0,https://insights.blackcoffer.com/what-jobs-wil...,112,135,-0.093117,0.288551,10.070588,0.371495,4.176833,10.070588,318,856,2.34229,5,0.133178
3,40.0,https://insights.blackcoffer.com/will-machine-...,121,123,-0.008197,0.363095,8.096386,0.311012,3.362959,8.096386,209,672,2.10119,2,0.169643
4,41.0,https://insights.blackcoffer.com/will-ai-repla...,118,119,-0.004219,0.289377,10.111111,0.30525,4.166545,10.111111,250,819,2.150183,8,0.139194


In [58]:
df.to_excel('Output Data Structure.xlsx',index=False)
    
    
        