## df columns 

In [None]:
# The .loc attribute supports slicing
# We can select all rows by using a colon followed by the column name as the second parameter. 
# Multiple columns can be included as a list to the second parameter. 
df.loc[:,['Column1', 'Column2']]

In [None]:
# .drop has two optional parameters: inplace and axis
# inplace modifies the original df, axis changes the axis to column as opposed to row which is the default.
df.drop("Name", inplace=True, axis=1) 

In [None]:
# Add a new column to the df by assigning it to some value using the indexing operator.
df['Class Ranking'] = None

## csv files

In [None]:
# Use ! prepended to use shell commands to inspect the raw csv file
!cat datasets/BTC-USD_82322.csv

In [4]:
import pandas as pd

df = pd.read_csv('datasets/BTC-USD_82322.csv')

df.head()

Unnamed: 0,Date,Close
0,07-15-2010,0.0
1,07-16-2010,0.04951
2,07-17-2010,0.08585
3,07-18-2010,0.09307
4,07-19-2010,0.08181


In [20]:
# We can change the index
df = pd.read_csv('datasets/BTC-USD_82322.csv', index_col=0)
df.head()

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
07-15-2010,0.0
07-16-2010,0.04951
07-17-2010,0.08585
07-18-2010,0.09307
07-19-2010,0.08181


In [21]:
# Add a new column
new_df['Sentiment'] = None
new_df.head()

Unnamed: 0_level_0,Closing Price,Sentiment
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
07-15-2010,0.0,
07-16-2010,0.04951,
07-17-2010,0.08585,
07-18-2010,0.09307,
07-19-2010,0.08181,


In [22]:
# If a column name doesn't change, inspect df with columns attribute
new_df.columns

Index(['Closing Price', 'Sentiment'], dtype='object')

In [23]:
# We can change column names
new_df = new_df.rename(columns={'Sentiment':'Sentiment Rating', 'Close':'Closing Price'})
new_df.head()

Unnamed: 0_level_0,Closing Price,Sentiment Rating
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
07-15-2010,0.0,
07-16-2010,0.04951,
07-17-2010,0.08585,
07-18-2010,0.09307,
07-19-2010,0.08181,


In [25]:
# We can also clean each column name when renaming to eliminate extra space characters
new_df = new_df.rename(mapper=str.strip, axis='columns')
new_df.head()

Unnamed: 0_level_0,Closing Price,Sentiment Rating
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
07-15-2010,0.0,
07-16-2010,0.04951,
07-17-2010,0.08585,
07-18-2010,0.09307,
07-19-2010,0.08181,


In [26]:
df.columns

Index(['Close'], dtype='object')

In [32]:
# Use a list to change only the columns you're interested in or use list comprehension to change all columns

# e.g. Change all of the values of the column names to lower case

# take the column index and turn it into a list
cols = list(new_df.columns)

# Then list comprehension
cols = [x.lower().strip() for x in cols]

# THen overwrite what is in the .columns attribute
new_df.columns = cols

new_df.head()

Unnamed: 0_level_0,closing price,sentiment rating
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
07-15-2010,0.0,
07-16-2010,0.04951,
07-17-2010,0.08585,
07-18-2010,0.09307,
07-19-2010,0.08181,
