In [1]:
import os
import pandas as pd
import json

In [2]:
website_info = pd.read_csv('./../data/Website - Website.csv')
website_info.tail()

Unnamed: 0,First Name,Last Name,Professional Headshot,Funny Headshot,GitHub,LinkedIn,PDF Resume,Is Job Searching,Capstone (link),Tagline,Bio,Personal site or Capstone repo,Class Website
21,William,McWilliams,x,,https://github.com/WMcWilliams15,https://www.linkedin.com/in/will-mcwilliams/,x,True,https://github.com/WMcWilliams15/NSS-Capstone,“It always seems impossible until it is done.”...,"Carpenter turned data analyst, who has an infi...",,https://nss-data-analytics-cohort-2.github.io/
22,Yasin,Hasan,x,,https://github.com/yhasan47,https://www.linkedin.com/in/yasinhasan47,,False,https://github.com/yhasan47/nfl-combine-capstone,"""I am ready to face any challenge that might b...",A Science Teacher turned into a Data Analyst.,,https://nss-data-analytics-cohort-2.github.io/
23,UrLeaka,Test,x,x,https://github.com/unewsome,https://www.linkedin.com/in/urleaka-newsome/,,,,"""Data is as data does""",This stuff is fun!' I can't believe I said tha...,,https://nss-data-science-cohort-2.github.io/
24,Matt,Test,x,x,,,,,,,,,
25,Taylor,Test,,,,,,,,"""Data does as data is""",,,


---

Filter out the teachers / nan stuff

In [3]:
website_info = website_info[~((website_info['Last Name'] == 'Test') | website_info['First Name'].isnull())]

In [4]:
website_info.head()

Unnamed: 0,First Name,Last Name,Professional Headshot,Funny Headshot,GitHub,LinkedIn,PDF Resume,Is Job Searching,Capstone (link),Tagline,Bio,Personal site or Capstone repo,Class Website
0,Adam,Tsuchiyama,X,,https://github.com/adamtsuch21,https://www.linkedin.com/in/adam-tsuchiyama/,x,True,https://github.com/adamtsuch21/nss_capstone,"""In God we trust. All others, bring data."" - W...",Whether it's determining how to incorporate th...,https://github.com/adamtsuch21/nss_capstone,https://nss-data-analytics-cohort-2.github.io/
1,Alan,Brunlinger,x,X,https://github.com/abrunlinger,https://www.linkedin.com/in/alan-brunlinger/,X,True,https://github.com/abrunlinger/Quarantine-Life...,"“Any fool can make things complicated, it requ...",I have always had a passion for deciphering da...,,https://nss-data-analytics-cohort-2.github.io/
2,Ann,Rumsey,x,,https://github.com/annrinTN,https://www.linkedin.com/in/ann-rumsey,x,False,https://github.com/annrinTN/capstone_annrumsey,Connecting the dots between people and data,Voracious curiosity compels me to study patter...,https://github.com/annrinTN/annrinTN.github.io,https://nss-data-analytics-cohort-2.github.io/
3,Brandes,Moore,x,X,https://github.com/brandesmoore/,https://www.linkedin.com/in/brandesmoore,x,True,https://github.com/brandesmoore/capstone_track...,"""Every day I wake up is another chance to get ...",My data journey has led me to question everyth...,,https://nss-data-analytics-cohort-2.github.io/
4,Brant,Ivey,x,X,https://github.com/BrantIvey,https://www.linkedin.com/in/brant-ivey/,x,True,https://github.com/BrantIvey/NSS_Capstone,“People...operate with beliefs and biases. To ...,"When someone asks a question, I want to know m...",https://github.com/BrantIvey/NSS_Capstone,https://nss-data-analytics-cohort-2.github.io/


---

Rename the columns to something that can be ingested by the js

In [5]:
column_map = {
    'First Name': 'firstName',
    'Last Name': 'lastName',
    'Tagline': 'reelThemIn',
    'Bio': 'bio',
    'GitHub': 'github',
    'LinkedIn': 'linkedin',
    'Is Job Searching': 'job_searching',
    'Personal site or Capstone repo': 'portfolio'
}

website_info = website_info.rename(column_map, axis=1)
website_info = website_info[column_map.values()]

website_info.columns

Index(['firstName', 'lastName', 'reelThemIn', 'bio', 'github', 'linkedin',
       'job_searching', 'portfolio'],
      dtype='object')

---

Make the paragraphs

In [6]:
def html_paragraph(v):
    if pd.isna(v):
        return ""
    return f"<p>{str(v)}</p>"


website_info['reelThemIn'] = website_info['reelThemIn'].apply(html_paragraph)
website_info['bio'] = website_info['bio'].apply(html_paragraph)

---

Clean up some excess characters

In [7]:
for col in website_info.columns:
    try:
        website_info[col] = website_info[col] \
            .str.replace('“', '"') \
            .str.replace('”', '"') \
            .str.replace("’", "'") \
            .str.replace("—", "-")
    except:
        pass

---

Gather the image paths

In [8]:
def image_path(name, idx):
    return f"../assets/img/resized/{name.lower()}{idx}.jpeg"


website_info['proImg'] = website_info['firstName'].apply(image_path, idx=1)
website_info['funImg'] = website_info['firstName'].apply(image_path, idx=2)

---

Resume paths

In [9]:
def resume_path(name):
    path = f"../assets/resume/{name.lower()}.pdf"
    if os.path.exists(path):
        return path


website_info['resume'] = website_info['firstName'].apply(resume_path)
website_info['resume']

0                               None
1                               None
2                               None
3       ../assets/resume/brandes.pdf
4         ../assets/resume/brant.pdf
5     ../assets/resume/christina.pdf
6         ../assets/resume/david.pdf
7                               None
8          ../assets/resume/erin.pdf
9        ../assets/resume/fitsum.pdf
10                              None
11                              None
12       ../assets/resume/justin.pdf
13                              None
14       ../assets/resume/landry.pdf
15                              None
16                              None
17                              None
18                              None
19        ../assets/resume/tracy.pdf
20                              None
21                              None
22                              None
Name: resume, dtype: object

---

Assign Ids

In [10]:
website_info['id'] = website_info.index + 1

---

Take a peek

In [11]:
website_info.head(1)

Unnamed: 0,firstName,lastName,reelThemIn,bio,github,linkedin,job_searching,portfolio,proImg,funImg,resume,id
0,Adam,Tsuchiyama,"<p>""In God we trust. All others, bring data."" ...",<p>Whether it's determining how to incorporate...,https://github.com/adamtsuch21,https://www.linkedin.com/in/adam-tsuchiyama/,True,https://github.com/adamtsuch21/nss_capstone,../assets/img/resized/adam1.jpeg,../assets/img/resized/adam2.jpeg,,1


In [12]:
website_info.to_json("./../data/cohort.json", orient='records')