In [58]:
# Reading files with Open:
# We will use the open() function to get a file object.
# The first argument is the file path. This is made up of the file name and the file directory.
# The second parameter is the mode; common values used include 'r' for reading, 'w' for writing and 'a' for appending.
file1 = open("F:\\jupyterNotebooks\\sample.txt","r") # path - F:\\jupyterNotebooks\\sample.txt
file1.read() # This only reads the file. but if you store it in an object, you can then print the contents.
# We can apply the read() method to that object to read data from the file.
# Once we have the file, we can obtain various information of the file.
file1.name # gives the name of the file along with its path.
file1.mode # gives what mode the object is in - r, w, a.
# always close the object.
file1.close() 
# this may get tedious sometimes. Thus we can use the "with" statement. Using a 'with' statement to open the file is 
# better practice because it automatically closes the file. The code will run everything in the indent block, 
# then closes the file.
with open("F:\\jupyterNotebooks\\sample.txt","r") as file2:
    file2Stuff = file2.read()
    print(file2.name)
file2.closed # True
# file2.read() # you cannot read from the file outside the indent.
file2Stuff # but you can print the file content outside the indent as well. 'This is line 1.\nThis is line 2.\nThis is line 3.'
print(file2Stuff) # This prints the content as it is, i.e, in this case, a line after another.

# We can output every line as an element in a list using the method readlines().
with open("F:\\jupyterNotebooks\\sample.txt","r") as file3:
    file3Stuff = file3.readlines()
    print(type(file3Stuff)) # <class 'list'>
    print(file3Stuff) # ['This is line 1.\n', 'This is line 2.\n', 'This is line 3.']
    
    file3.seek(0) # Once you read a file, the cursor goes to the end of the file. So you cant read an open file successively.
    # But, seek(0) takes the cursor to the beginning of the file. So, you can read again.
    
    file3line1 = file3.readline() # reads and stores just the 1st line alone. Now the cursor is at the end of line 1
    print(file3line1)
    file3line2 = file3.readline() # Now the cursor reads the 2nd line and stores it.
    print(file3line2)
    file3.seek(0)
    
    # We can also use a loop to print the lines of a file.
    for lines in file3:
        print(lines)
    file3.seek(0)
    
    # We can specify the number of characters we would like to read from a string as an argument to the method read().
    # Each time you call the method, the cursor goes to the end of the specified character. 
    # Therefore, each time we call the method, we will progress through the text.
    file3char16 = file3.read(16)
    print(file3char16)
    file3char4 = file3.read(4)
    print(file3char4)

F:\jupyterNotebooks\sample.txt
This is line 1.
This is line 2.
This is line 3.
<class 'list'>
['This is line 1.\n', 'This is line 2.\n', 'This is line 3.']
This is line 1.

This is line 2.

This is line 1.

This is line 2.

This is line 3.
This is line 1.

This


In [63]:
# Writing files with Open:
# Similar to read, we can use the "with" statement to write to file.
with open("F:\\jupyterNotebooks\\sampleWrite.txt","w") as file1Write:
    file1Write.write("This is line 1.\n")
    file1Write.write("This is line 2.\n")
    
    # We can write each element in a list to a file.
    Lines = ["Line 1\n","Line 2\n"]
    for writeLine in Lines:
        file1Write.write(writeLine)
# Any file in the directory with the same name will be overwritten.

In [64]:
# Appending files:
# This will add the contents to the existing file. Will not create a new file, but just use the existing file.
with open("F:\\jupyterNotebooks\\sampleWrite.txt","a") as file1Append:
    file1Append.write("This is line is appended.\n")

In [65]:
# Copying files:
with open("F:\\jupyterNotebooks\\sampleWrite.txt","r") as readFile:
    with open("F:\\jupyterNotebooks\\sampleCopy.txt","w") as writeFile:
        for line in readFile:
            writeFile.write(line)

In [68]:
# Libraries:
# Dependencies or libraries are pre-written code to help solve problems. Dependencies need to be installed first, before 
# you "import" them.
import pandas as pd
# We use "as" to assign a shorter name for the library name.

In [76]:
# Loading data with Pandas:
# Pandas is a popular library for data analysis.
csvPath = "F:\\jupyterNotebooks\\dataid.csv"
df = pd.read_csv(csvPath) # The csv file is stored as a dataframe.
type(df) # pandas.core.frame.DataFrame
df.head() # displays the first 5 rows of a dataframe.

# Similarly, we can also read an excel file using the pd.read_excel() function.

pandas.core.frame.DataFrame

In [93]:
# Dataframes:
# A dataframe is comprised of rows and columns. 
# We can create a data frame out of a dictionary.
# The keys correspond to the column labels. The values are lists corresponding to the rows. 
dict1 = {'a':[11,21,31],'b':[21,22,23],'c':[96,41,37]}
#  We then cast the dictionary to a dataframe using the function DataFrame(). 
df = pd.DataFrame(dict1)
df.head()

# We can create a new dataframe consisting of one column.
# We just put the dataframe name and the name of the column header enclosed in double brackets. 
dfB = df[["b"]]
dfB.head() # This assigns just the column "b" of 'df' to 'dfB'
# To extract multiple columns, just add the column names in the [[]]

# One way to access unique elements is the method iloc(). It takes the indexes. [x,y]: x - rows, y - columns.
df.iloc[0,0] # First row, first column
df.iloc[1,0] # Second row, first column
df.iloc[0,1] # First row, second column
# The method loc() can be used to specfify the column names.
df.loc[0,"b"]
# We can also slice dataframes.
df.iloc[0:2,1:3] # selects the first 2 rows and the 2nd and 3rd column.
df.loc[0:1,"b":"c"] # selects the first 2 rows and the 2nd and 3rd column. note the diff in the the index of the rows. 
# Here, 0 and 1st rows are selected.

Unnamed: 0,b,c
0,21,96
1,22,41


In [99]:
# Working with and Saving data:
# To find the unique elements in a column.
df["b"].unique()
# To subset a df of rows where one column values satisfy a specific condition:
df[df["b"]>=22] # selects the rows where  the column "b" values are > = 22.

# To save the dataframe to a csv.
df.to_csv("pyData.csv") # extensions are important.

In [2]:
# Working with APIs:
# Watson APIs used in lab.