## Requirements

In [26]:
from distutils.filelist import findall
import pandas as pd
from pmaw import PushshiftAPI
import datetime as dt
import os
import requests
import re
from bs4 import BeautifulSoup as bs
import spacy_udpipe

## Retreiving posts

#### Load Pushshift

In [27]:
api = PushshiftAPI()

#### Search parameters


##### Specify time frame

In [28]:
start_time = int(dt.datetime.timestamp(dt.datetime.strptime('2011-02-14 00:00:00', '%Y-%m-%d %H:%M:%S'))) #time EarthPorn was created
end_time = int(dt.datetime.timestamp(dt.datetime.strptime('2022-08-01 00:00:00', '%Y-%m-%d %H:%M:%S')))
current_time = int(dt.datetime.timestamp(dt.datetime.now()))

# Create string specifying time frame that can be used for file name when saving data as csv
search_time = '20110214-20220816' 

##### Specify subreddit and search limit

In [29]:
# Set subreddit and limit
subreddit = 'EarthPorn'
limit = None

#### Query posts from pushshift using search_submissions with default parameters

Default parameters:  
max_ids_per_request = 500 (max)  
max_results_per_request = 100 (max)  
mem_safe = False -> stores responses in cache during operation if True  
safe_exit = False -> will safely exit if interupted by storing current responses and requests in the cache if True  
cache_dir -> path to cache responses in when mem_safe or safe_exit is enabled  

In [30]:
posts = api.search_submissions(subreddit=subreddit, limit=limit, before=current_time, after=start_time)
print(f'Retrieved {len(posts)} posts from Pushshift')

Retrieved 10650 posts from Pushshift


#### Create data frame for posts

In [31]:
post_list = [post for post in posts]
posts_df = pd.DataFrame(post_list)

Preview sample of posts data

In [32]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_row', 25)
print(posts_df)

       subreddit selftext author_fullname  gilded  \
0      EarthPorn              t2_oob1361x       0   
1      EarthPorn              t2_clq4t2y4       0   
2      EarthPorn              t2_oob1361x       0   
3      EarthPorn              t2_oob1361x       0   
4      EarthPorn              t2_w2pidvrr       0   
...          ...      ...             ...     ...   
10645  EarthPorn              t2_2wvv0tzz       0   
10646  EarthPorn              t2_iiexho6v       0   
10647  EarthPorn              t2_vz6y6zou       0   
10648  EarthPorn              t2_7vrj82jq       0   
10649  EarthPorn                 t2_stf5q       0   

                                                                                              title  \
0      Hierapolis &amp; Pamukkale springs near city of Denizli, Turkey | by isa_ozdere. [9840x6400]   
1                              Winter Creek Wonderland 10HOURS 4K SCREENSAVER HDR WITH NATURE SOUND   
2      Hierapolis &amp; Pamukkale springs near city of

#### Clean up data frame

##### Get list of all column names

In [33]:
posts_df.columns

Index(['subreddit', 'selftext', 'author_fullname', 'gilded', 'title',
       'link_flair_richtext', 'subreddit_name_prefixed', 'hidden', 'pwls',
       'link_flair_css_class', 'thumbnail_height', 'top_awarded_type',
       'hide_score', 'quarantine', 'link_flair_text_color', 'upvote_ratio',
       'author_flair_background_color', 'subreddit_type',
       'total_awards_received', 'media_embed', 'thumbnail_width',
       'author_flair_template_id', 'is_original_content', 'secure_media',
       'is_reddit_media_domain', 'is_meta', 'category', 'secure_media_embed',
       'link_flair_text', 'score', 'is_created_from_ads_ui', 'author_premium',
       'thumbnail', 'edited', 'author_flair_css_class',
       'author_flair_richtext', 'gildings', 'post_hint', 'content_categories',
       'is_self', 'link_flair_type', 'wls', 'removed_by_category',
       'author_flair_type', 'domain', 'allow_live_comments', 'suggested_sort',
       'url_overridden_by_dest', 'view_count', 'archived', 'no_follow',


##### Remove unwanted columns

In [34]:
posts_df.drop(columns = ['author_flair_css_class', 'author_flair_text'], inplace = True)

#posts_df.drop(columns = ['author_flair_css_class', 'author_flair_text', 'gilded', 'mod_reports', 'user_reports', 'brand_safe', 'contest_mode', 'spoiler', 'suggested_sort', 'author_flair_richtext', 'author_flair_type', 'can_mod_post', 'link_flair_richtext', 'link_flair_text_color', 'link_flair_type', 'rte_mode', 'subreddit_type', 'thumbnail_height', 'thumbnail_width', 'author_flair_background_color', 'author_flair_text_color', 'author_patreon_flair', 'gildings', 'is_robot_indexable', 'link_flair_background_color', 'send_replies', 'no_follow', 'updated_utc', 'all_awardings', 'allow_live_comments', 'author_premium', 'awarders', 'total_awards_received', 'treatment_tags', 'is_created_from_ads_ui', 'parent_whitelist_status', 'pwls', 'url_overridden_by_dest', 'whitelist_status', 'wls', 'removed_by_category', 'author_is_blocked', 'approved_at_utc', 'banned_at_utc', 'steward_reports', 'removed_by', 'poll_data', 'top_awarded_type', 'retrieved_on'], inplace = True)

##### Change column names and reorder columns

1. Create dictionary - 'old name' : 'new name'

In [35]:
column_names = {'id' : 'PostID',
                'subreddit' : 'Subreddit',
                'subreddit_id' : 'SubredditID',
                'created_utc' : 'PostTime',
                'title' : 'PostTitle',
                'author' : 'Username',
                'author_created_utc' : 'UserCreatedTime',
                'author_fullname' : 'AuthorName', 
                'domain' : 'ImageDomain',
                'full_link' : 'Link',
                'is_self' : 'IsTextPost',
                'media_embed' : 'EmbeddedMedia',
                'secure_media_embed' : 'SecureEmbeddedMedia',
                'num_comments' : 'CommentNumber', 
                'over_18' : 'NSFW',
                'permalink' : 'Permalink', 
                'score' : 'Upvotes', 
                'selftext' : 'PostText', 
                'thumbnail' : 'Thumbnail',
                'url' : 'ImageURL',
                'media' : 'Media',
                'secure_media' : 'SecureMedia',
                'stickied' : 'Stickied',
                'locked' : 'CommentsLocked',
                'post_hint' : 'PostHint',
                'preview' : 'Preview',
                'is_crosspostable' : 'IsCrosspostable',
                'is_reddit_media_domain' : 'IsRedditMediaDomain',
                'is_video' : 'IsVideo',
                'num_crossposts' : 'CrosspostsNumber', 
                'pinned' : 'Pinned',
                'crosspost_parent' : 'CrosspostParent',
                'crosspost_parent_list' : 'CrosspostParentList',
                'is_meta' : 'IsMeta',
                'is_original_content' : 'IsOriginal',
                'media_only' : 'OnlyMedia', 
                'subreddit_subscribers' : 'SubRedditSubscribers',
                'media_metadata' : 'MediaMetadata', 
                'upvote_ratio' : 'UpvoteRatio', 
                'gallery_data' : 'GalleryData', 
                'is_gallery' : 'IsGallery', 
                'author_cakeday' : 'AuthorBirthdate',
                'edited' : 'Edited', 
                'view_count' : 'ViewCount', 
                'author_id' : 'AuthorID',
                'og_description' : 'OGDescription',
                'og_title' : 'OGTitle'}

2. Rename columns using dictionary

In [36]:
posts_tidy_df = posts_df.rename(columns = column_names)
# Check to see if columns have been renamed
posts_tidy_df.columns 

Index(['Subreddit', 'PostText', 'AuthorName', 'gilded', 'PostTitle',
       'link_flair_richtext', 'subreddit_name_prefixed', 'hidden', 'pwls',
       'link_flair_css_class', 'thumbnail_height', 'top_awarded_type',
       'hide_score', 'quarantine', 'link_flair_text_color', 'UpvoteRatio',
       'author_flair_background_color', 'subreddit_type',
       'total_awards_received', 'EmbeddedMedia', 'thumbnail_width',
       'author_flair_template_id', 'IsOriginal', 'SecureMedia',
       'IsRedditMediaDomain', 'IsMeta', 'category', 'SecureEmbeddedMedia',
       'link_flair_text', 'Upvotes', 'is_created_from_ads_ui',
       'author_premium', 'Thumbnail', 'Edited', 'author_flair_richtext',
       'gildings', 'PostHint', 'content_categories', 'IsTextPost',
       'link_flair_type', 'wls', 'removed_by_category', 'author_flair_type',
       'ImageDomain', 'allow_live_comments', 'suggested_sort',
       'url_overridden_by_dest', 'ViewCount', 'archived', 'no_follow',
       'IsCrosspostable', 'Pinn

3. Reorder columns

In [37]:
#posts_tidy_df = posts_tidy_df[['Subreddit', 'SubredditID', 'PostTitle', 'PostID', 'PostTime', 'Username', 'Upvotes', 'CommentNumber', 'ImageDomain', 'ImageURL', 'UserCreatedTime', 'AuthorName', 'Permalink', 'Link', 'IsTextPost', 'PostText', 'EmbeddedMedia', 'Thumbnail', 'NSFW']]
posts_tidy_df = posts_tidy_df[['Subreddit', 'SubredditID', 'PostTitle', 'PostID', 'PostTime', 'Username', 'Upvotes', 'CommentNumber', 'ImageDomain', 'ImageURL', 'AuthorName', 'Permalink', 'IsTextPost', 'PostText', 'EmbeddedMedia', 'Thumbnail', 'NSFW']]
                                       

#posts_reordered_df = posts_renamed_df[['Subreddit', 'SubredditID', 'PostTitle', 'PostID', 'PostTime', 'Username', 'ViewCount', 'Upvotes', 'UpvoteRatio', 'CommentNumber', 'Edited', 'OGDescription', 'OGTitle', 'ImageDomain', 'ImageURL', 'Permalink', 'Link', 'IsTextPost', 'PostText', 'UserCreatedTime', 'AuthorID', 'AuthorName', 'AuthorBirthdate', 'IsVideo', 'IsMeta', 'IsOriginal', 'IsGallery', 'GalleryData', 'IsRedditMediaDomain', 'IsCrosspostable', 'CrosspostsNumber', 'CrosspostParent', 'CrosspostParentList', 'SubRedditSubscribers', 'OnlyMedia', 'MediaMetadata', 'EmbeddedMedia', 'SecureEmbeddedMedia', 'Media', 'SecureMedia', 'Thumbnail', 'Stickied', 'Pinned', 'PostHint', 'Preview', 'CommentsLocked', 'NSFW']]

Convert time stamp from UNIX to UTC

In [38]:
posts_tidy_df['PostTime'] = pd.to_datetime(posts_tidy_df['PostTime'], utc=True, unit='s')

#### Fix image URLS

In [39]:
posts_tidy_df = posts_tidy_df.reindex(columns = posts_tidy_df.columns.tolist() + ['NewURL']) #create column for fixed urls

for index, row in posts_tidy_df.iterrows():
    if row['ImageDomain'] == 'flickr.com':
        print(row['ImageURL'])
        r = requests.get(row['ImageURL'])
        soup = bs(r.content)
        images = re.findall(r'(\/\/live\.staticflickr\.com\/[0-9][0-9][0-9][0-9][0-9]\/[a-zA-Z0-9_]+\.(?:png|jpg|jpeg|gif|png|svg))', str(soup))
        
        for image in images:
            image_url = image
            break
        print(image_url)
        posts_tidy_df.at[index, 'NewURL'] = image_url
    elif row['ImageDomain'] == 'imgur.com':
        posts_tidy_df.at[index, 'NewURL'] = re.sub(r'http://imgur.com', 'http://i.imgur.com', row['ImageURL']) + '.jpg'
    elif row['ImageDomain'] == 'i.imgur.com':
        posts_tidy_df.at[index, 'NewURL'] = row['ImageURL']
    else:
        continue

https://www.flickr.com/photos/26020895@N04/52728807824/in/photostream/lightbox/
//live.staticflickr.com/65535/52728807824_13b404a60e_b.jpg
https://www.flickr.com/photos/26020895@N04/52728807824/in/photostream/lightbox/
//live.staticflickr.com/65535/52728807824_13b404a60e_b.jpg
https://www.flickr.com/photos/26020895@N04/52728807824/in/photostream/lightbox/
//live.staticflickr.com/65535/52728807824_13b404a60e_b.jpg
https://www.flickr.com/photos/26020895@N04/52728807824/in/photostream/lightbox/
//live.staticflickr.com/65535/52728807824_13b404a60e_b.jpg
https://www.flickr.com/photos/26020895@N04/52728807824/in/photostream/lightbox/
//live.staticflickr.com/65535/52728807824_13b404a60e_b.jpg
https://www.flickr.com/photos/26020895@N04/52728807824/in/photostream/lightbox/
//live.staticflickr.com/65535/52728807824_13b404a60e_b.jpg
https://www.flickr.com/photos/26020895@N04/52728807824/in/photostream/lightbox/
//live.staticflickr.com/65535/52728807824_13b404a60e_b.jpg
https://www.flickr.com/phot

## Save data frame and images

#### View data frame

In [40]:
pd.set_option('display.max_colwidth', None)
posts_tidy_df

Unnamed: 0,Subreddit,SubredditID,PostTitle,PostID,PostTime,Username,Upvotes,CommentNumber,ImageDomain,ImageURL,AuthorName,Permalink,IsTextPost,PostText,EmbeddedMedia,Thumbnail,NSFW,NewURL
0,EarthPorn,t5_2sbq3,"Hierapolis &amp; Pamukkale springs near city of Denizli, Turkey | by isa_ozdere. [9840x6400]",11jxs0o,2023-03-06 12:38:32+00:00,sankisuhana,1,0,image.arrivalguides.com,https://image.arrivalguides.com/9840x6400/05/a0d0c4ba602deaa7b1d6e1ea8365e780.jpg,t2_oob1361x,/r/EarthPorn/comments/11jxs0o/hierapolis_pamukkale_springs_near_city_of_denizli/,False,,{},https://b.thumbs.redditmedia.com/VPsgn4L8D5TOrS7fYL29IRKT_lY7gstmmB94_-o6xkI.jpg,False,
1,EarthPorn,t5_2sbq3,Winter Creek Wonderland 10HOURS 4K SCREENSAVER HDR WITH NATURE SOUND,11jxjct,2023-03-06 12:27:51+00:00,LegitimateGate6150,1,1,youtube.com,https://www.youtube.com/watch?v=xdlH7A9poqU,t2_clq4t2y4,/r/EarthPorn/comments/11jxjct/winter_creek_wonderland_10hours_4k_screensaver/,False,,"{'content': '&lt;iframe width=""356"" height=""200"" src=""https://www.youtube.com/embed/xdlH7A9poqU?feature=oembed&amp;enablejsapi=1"" frameborder=""0"" allow=""accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture; web-share"" allowfullscreen title=""Winter Creek Wonderland 10HOURS 4K SCREENSAVER HDR WITH NATURE SOUND""&gt;&lt;/iframe&gt;', 'width': 356, 'scrolling': False, 'height': 200}",https://b.thumbs.redditmedia.com/ZqDLnd3nknz_4QRutRkezFQAE-uBCcq3Y9TjKquFUTg.jpg,False,
2,EarthPorn,t5_2sbq3,"Hierapolis &amp; Pamukkale springs near city of Denizli, Turkey | by isa_ozdere. [9840x6400]",11jxfz1,2023-03-06 12:23:35+00:00,sankisuhana,1,1,i.redd.it,https://i.redd.it/ptjhmy0f04ma1.jpg,t2_oob1361x,/r/EarthPorn/comments/11jxfz1/hierapolis_pamukkale_springs_near_city_of_denizli/,False,,{},https://b.thumbs.redditmedia.com/qvAn5hE3eohjWC5oVycCmVVwekpS8WOzcvPMyC68G7g.jpg,False,
3,EarthPorn,t5_2sbq3,"Hierapolis &amp; Pamukkale springs near city of Denizli, Turkey | by isa_ozdere. [9840x6400]",11jx060,2023-03-06 12:03:29+00:00,sankisuhana,1,1,i.redd.it,https://i.redd.it/26a4pzulv3ma1.jpg,t2_oob1361x,/r/EarthPorn/comments/11jx060/hierapolis_pamukkale_springs_near_city_of_denizli/,False,,{},default,False,
4,EarthPorn,t5_2sbq3,Feeling the Magic of Madeira (OC) [1170x2080],11jwpoh,2023-03-06 11:49:50+00:00,lolaaawild,1,0,i.redd.it,https://i.redd.it/ijue8n1ye5ma1.jpg,t2_w2pidvrr,/r/EarthPorn/comments/11jwpoh/feeling_the_magic_of_madeira_oc_1170x2080/,False,,{},https://b.thumbs.redditmedia.com/5ntPcWpmcm3ndqUj7PPdjSz9MaunuqFkldnhVQZTENs.jpg,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10645,EarthPorn,t5_2sbq3,"Timeless Sunrise, SilaryGaon, Sikkim, [5884*3566][OC]",11jdtm9,2023-03-05 21:06:31+00:00,bondbu,1,1,i.redd.it,https://i.redd.it/heykn26b11ma1.jpg,t2_2wvv0tzz,/r/EarthPorn/comments/11jdtm9/timeless_sunrise_silarygaon_sikkim_58843566oc/,False,,{},https://a.thumbs.redditmedia.com/C8_lEld07ejZcSpf0zU_If0txDCLKBjLgbJxoSvejP8.jpg,False,
10646,EarthPorn,t5_2sbq3,The Chernobyl Disaster #youtubeshorts,11jdkbc,2023-03-05 20:57:56+00:00,Unique-Ad-4524,1,1,youtube.com,https://youtube.com/shorts/7v6LO6kQhhE?feature=share,t2_iiexho6v,/r/EarthPorn/comments/11jdkbc/the_chernobyl_disaster_youtubeshorts/,False,,{},https://b.thumbs.redditmedia.com/9MGuECIZzyVC0RkVRDhG1Rb8WL-x47YcBwH4Jri7N3c.jpg,False,
10647,EarthPorn,t5_2sbq3,Peaceful!,11jd1lu,2023-03-05 20:42:30+00:00,MoHowTo,1,1,i.redd.it,https://i.redd.it/6cs42j91x0ma1.png,t2_vz6y6zou,/r/EarthPorn/comments/11jd1lu/peaceful/,False,,{},https://b.thumbs.redditmedia.com/idx_BlT3HjhLcgUYwGmT26KG7mVFk5EACm2ERCISTBQ.jpg,False,
10648,EarthPorn,t5_2sbq3,Cold winter river - below Mt. Rainier National Park [OC] [6500x4426],11jbvob,2023-03-05 20:18:10+00:00,Glum_Plane,3,1,i.redd.it,https://i.redd.it/edtzl1o5bzla1.jpg,t2_7vrj82jq,/r/EarthPorn/comments/11jbvob/cold_winter_river_below_mt_rainier_national_park/,False,,{},https://b.thumbs.redditmedia.com/2pnjgCVxgUz--JVovxKUSJVwmKUUiU-wc-gebYdevGo.jpg,False,


#### Save data frame as CSV

In [41]:
filename = 'C:/Users/Skrubbe/Desktop/DTU_Civil/Semester 6/Fagprojekt/Projekt Github/FlickrFaves/Data/Reddit_'+ subreddit + '_' + search_time + '.csv'
posts_tidy_df.to_csv(filename, header=True, index=False, columns=list(posts_tidy_df.axes[1]))

#### Save images from URLs

In [51]:
#Save images from data frame URL column
root_folder = 'C:/Users/Skrubbe/Desktop/DTU_Civil/Semester 6/Fagprojekt/Projekt Github/FlickrFaves/Data/Reddit_EarthPorn_20110214-20220816.csv'

def download(row):
   filename = root_folder + subreddit + '_' + row['PostID'] + '.jpg'

   # create folder if it doesn't exist
   os.makedirs(os.path.dirname(filename), exist_ok = True)

   url = row.NewURL
   print(f"Downloading {url} to {filename}")
   r = requests.get(url, allow_redirects=True)
   with open(filename, 'wb') as f:
       f.write(r.content)

try:
    posts_df.apply(download, axis=1)
except:
    pass

## Text cleaning and annotating features

> this might be useful later on to create a list of features mentioned in the text for each picture

#### Remove brackets and other characters

In [43]:
posts_clean_df = posts_tidy_df.rename(columns = column_names)
posts_clean_df['PostTitle'].replace(to_replace="\[(.*?)\]", value="", regex=True, inplace=True) 
posts_clean_df['PostTitle'].replace(to_replace="\(\d*?\s*[\u00D7?x?]\s*\d*?\)", value="", regex=True, inplace=True)
posts_clean_df['PostTitle'].replace(to_replace="\(", value="", regex=True, inplace=True)
posts_clean_df['PostTitle'].replace(to_replace="\)", value="", regex=True, inplace=True)
posts_clean_df['PostTitle'].replace(to_replace="-", value="", regex=True, inplace=True)

#### Load NLP model

In [44]:
spacy_udpipe.download("en")
nlp = spacy_udpipe.load("en")

Already downloaded a model for the 'en' language


##### Create new data frame for annotations

In [45]:
column_names = ['Sentence', 'Text ID', 'IDX', 'Text', 'Lemma', 'POS', 'Form', 'Dependency', 'Sentiment'] 
posts_annotated_df = pd.DataFrame(columns=column_names)

#### Create empty lists to store token values in

In [46]:
sent = []
i = []
idx = []
word = []
lemma = []
pos = []
tag = []
dep = []
sentiment = []
form = []

#### Tokenize post titles

In [47]:
for index, row in posts_clean_df.iterrows():
    text = row['PostTitle']
    doc = nlp(text)
    for token in doc:
        #print('Sentence:' + token.sent)
        sent.append(token.sent)
        i.append(token.i)
        idx.append(token.idx)
        word.append(token.text)
        lemma.append(token.lemma_)
        pos.append(token.pos_)
        form.append(token.morph.get("VerbForm"))
        tag.append(token.tag_)
        dep.append(token.dep_)
        sentiment.append(token.sentiment)
       

#### Add token annotations to data frame

In [48]:
posts_annotated_df['Sentence'] = sent
posts_annotated_df['Text ID'] = i
posts_annotated_df['Text'] = word
posts_annotated_df['Lemma'] = lemma
posts_annotated_df['POS'] = pos
posts_annotated_df['VerbForm'] = form
posts_annotated_df['Dependency'] = dep
posts_annotated_df['IDX'] = idx
posts_annotated_df['Sentiment'] = sentiment
posts_annotated_df['VerbForm'] = posts_annotated_df['VerbForm'].str[0]

print(posts_annotated_df)

                                                                                                    Sentence  \
0      (Hierapolis, &, amp, ;, Pamukkale, springs, near, city, of, Denizli, ,, Turkey, |, by, isa_ozdere, .)   
1      (Hierapolis, &, amp, ;, Pamukkale, springs, near, city, of, Denizli, ,, Turkey, |, by, isa_ozdere, .)   
2      (Hierapolis, &, amp, ;, Pamukkale, springs, near, city, of, Denizli, ,, Turkey, |, by, isa_ozdere, .)   
3      (Hierapolis, &, amp, ;, Pamukkale, springs, near, city, of, Denizli, ,, Turkey, |, by, isa_ozdere, .)   
4      (Hierapolis, &, amp, ;, Pamukkale, springs, near, city, of, Denizli, ,, Turkey, |, by, isa_ozdere, .)   
...                                                                                                      ...   
84245                                             (Cold, winter, river, below, Mt., Rainier, National, Park)   
84246                                                                                (Garden, of, The, G

##### Save annotations as CSV

In [49]:
filename = 'C:/Users/acali/OneDrive - Danmarks Tekniske Universitet/Code/Reddit_Annotated_'+ subreddit + '_' + search_time + '.csv'
posts_annotated_df.to_csv(filename, header=True, index=False, columns=list(posts_annotated_df.axes[1]))

OSError: Cannot save file into a non-existent directory: 'C:\Users\acali\OneDrive - Danmarks Tekniske Universitet\Code'

### Find features

#### Filter for nouns

In [None]:
nouns_df = posts_annotated_df[posts_annotated_df['POS'].str.contains("NOUN|PROPNOUN")]
features_df = nouns_df[nouns_df['Dependency'].str.contains('ROOT')]

#### Create list with features

In [None]:
features_list = features_df['Text'].tolist()

#### Create data frame with features and subreddit name

In [None]:
CES_features = pd.DataFrame()
CES_features['Features'] = features_list
CES_features['Subreddit'] = subreddit

#### Save features as CSV

In [None]:
All_CES_features_updated.to_csv(features_filename, header=True, index=False, columns=list(All_CES_features_updated.axes[1]))