# Python 1 Assignment
### Purpose: To explore different ways in which data can be brought in to Python via
- Excel files
- CSV files
- Website data

In [None]:
# Bring in standard libraries and set "call" name to ease access
import pandas as pd

# Set display to show up to 100 columns in dataframe
pd.set_option('display.max_columns',100) 

### Read Excel File

In [None]:
# Read Excel file into dataframe
df_excel =  pd.ExcelFile('DataFinder Data Sample.xlsx') 

# Show worksheet names in dataframe from Excel file 
print(df_excel.sheet_names)

In [None]:
# Load data from specific worksheet into dataframe
df_datafinder = df_excel.parse('______')

# Show data in top two rows
df_datafinder.head(___)

### Read CSV File

In [None]:
# Read CSV file into dataframe
df_titanic = pd.read_csv('Titanic Passenger data.csv', encoding="latin1", index_col = None, header=0) 

In [None]:
# Show data in top five rows
df_titanic.tail(5)

### Read Web Data

In [None]:
# Import library to read websites
import urllib.request

# Specify which URL/web page we are going to be scraping
_____ = "https://en.wikipedia.org/wiki/List_of_all-time_NFL_win%E2%80%93loss_records"

# Open the url using urllib.request and put the HTML into the page variable
page = urllib.request.urlopen(_____)

In [None]:
# Import the BeautifulSoup library so we can parse HTML and XML documents
from bs4 import BeautifulSoup

# Parse the HTML from the URL into the BeautifulSoup parse tree format
soup = BeautifulSoup(page, "lxml")

# Import the first table into a Python variable
win_loss_table = soup.find('table', class_='wikitable sortable')

# Show win-loss table in HTML
win_loss_table

In [None]:
# Create a list for each column in the table
A=[]; B=[]; C=[]; D=[]; E=[]; F=[]; G=[]; H=[]; I=[]

# We are going to ingore the headers in the <th> tags and create our own headers, but these could be imported too
# Go through the rows in the table and put the contents in the appropriate list
for row in win_loss_table.findAll('tr'):
    cells=row.findAll('td')
    if len(cells)==9:
        A.append(cells[0].find(text=True))
        B.append(cells[1].find(text=True))
        C.append(cells[2].find(text=True))
        D.append(cells[3].find(text=True))
        E.append(cells[4].find(text=True))
        F.append(cells[5].find(text=True))
        G.append(cells[6].find(text=True))
        H.append(cells[7].find(text=True))
        I.append(cells[8].find(text=True))

In [None]:
# Create a data frame with our specific column headers and append the data from the lists under each header
df_nfl = pd.DataFrame(data = A, columns=['Rank'])
df_nfl['Team'] = B
df_nfl['Games Played'] = C
df_nfl['Games Won'] = D
df_nfl['Games Lost'] = E
df_nfl['Games Tied'] = F
df_nfl['Percentage Won'] = G
df_nfl['First NFL Season'] = H
df_nfl['Team Division'] = I

# Data comes in as objects (strings). Convert certain fields to numbers after removing commas
df_nfl['Rank'] = pd.to_numeric(df_nfl['Rank'])
df_nfl['Games Played'] = pd.to_numeric(df_nfl['Games Played'].str.replace(',', ''))
df_nfl['Games Won'] = pd.to_numeric(df_nfl['Games Won'].str.replace(',', ''))
df_nfl['Games Lost'] = pd.to_numeric(df_nfl['Games Lost'].str.replace(',', ''))
df_nfl['Games Tied'] = pd.to_numeric(df_nfl['Games Tied'].str.replace(',', ''))
df_nfl['Percentage Won'] = pd.to_numeric(df_nfl['Percentage Won'])
df_nfl['First NFL Season'] = pd.to_numeric(df_nfl['First NFL Season'])

# Reset index to rank field
df_nfl.set_index('Rank', inplace=True)

# Show current condition of dataframe fields
df_nfl.info()

In [None]:
# Show 4 specific rows of data in the dataframe starting with row 5.  
# Remember, Python starts at 0 not 1 in indexing rows.
df_nfl[___:___]

## Python Assignment 1
### Name: Your Name Here

In [None]:
df_datafinder.sample(1)   # Shows a random sample of one row from dataframe

In [None]:
df_titanic.sample(1)   # Shows a random sample of one row from dataframe

In [None]:
df_nfl.sample(1)   # Shows a random sample of one row from dataframe

In [None]:
import datetime, socket
print(datetime.datetime.now().isoformat())
print("Your Computer Name is: " + socket.gethostname())
print("Your Computer IP Address is: " + socket.gethostbyname(socket.gethostname()))