<h1>Modify Dataframe Data</h1>

In [19]:
import pandas as pd
file = "../data/AAPL"
df = pd.read_csv(f"{file}.csv") #Read csv data

<h3>Change Columns in Dataframe</h3>

In [20]:
# Change all column names
print(f"Column names before: {df.columns}")
df.columns = [column.lower() for column in df.columns]
print(f"Column names after: {df.columns}")


Column names before: Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')
Column names after: Index(['date', 'open', 'high', 'low', 'close', 'adj close', 'volume'], dtype='object')


In [21]:
# Rename columns using dict
column_mappings = {"date": "time", "open": "price"}
df.rename(columns=column_mappings, inplace=True)
print(df.head(3))

         time       price        high         low       close   adj close  \
0  2020-11-02  109.110001  110.680000  107.320000  108.769997  108.074883   
1  2020-11-03  109.660004  111.489998  108.730003  110.440002  109.734207   
2  2020-11-04  114.139999  115.589996  112.349998  114.949997  114.215378   

      volume  
0  122866900  
1  107624400  
2  138235500  


Change Rows in DataFrame

In [22]:
# Change full row data
print(f"Row data before: {df.loc[1].to_list()}")
df.loc[1] = ['2020-11-03', 110, 111, 109, 110, 110, 107624400]
print(f"Row data after: {df.loc[1].to_list()}")

Row data before: ['2020-11-03', 109.660004, 111.489998, 108.730003, 110.440002, 109.734207, 107624400]
Row data after: ['2020-11-03', 110.0, 111.0, 109.0, 110.0, 110.0, 107624400]


In [23]:
# Change row data with specific column(s)
print(f"Row data before: {df.loc[2].to_dict()}")
df.loc[2, ["high", "low"]] = [112, 108]
print(f"Row data after: {df.loc[2].to_dict()}")

Row data before: {'time': '2020-11-04', 'price': 114.139999, 'high': 115.589996, 'low': 112.349998, 'close': 114.949997, 'adj close': 114.215378, 'volume': 138235500}
Row data after: {'time': '2020-11-04', 'price': 114.139999, 'high': 112.0, 'low': 108.0, 'close': 114.949997, 'adj close': 114.215378, 'volume': 138235500}


In [24]:
# Change row data with conditions
row_condition = df["price"] < 115
print(f"Before: {df.loc[row_condition, ['price', 'time']]}")
df.loc[row_condition, 'price'] = 115
# Change row data with conditions
print(f"After: {df.loc[row_condition, ['price', 'time']]}")


Before:          price        time
0   109.110001  2020-11-02
1   110.000000  2020-11-03
2   114.139999  2020-11-04
16  113.910004  2020-11-24
After:     price        time
0   115.0  2020-11-02
1   115.0  2020-11-03
2   115.0  2020-11-04
16  115.0  2020-11-24


In [25]:
# Add a new column
print(f"Before: {df.columns} \n {df.loc[0].to_dict()}")
df["price range"] = df['high'] - df['low'] #assume that low <= high
print(f"After: {df.columns} \n {df.loc[0].to_dict()}")

Before: Index(['time', 'price', 'high', 'low', 'close', 'adj close', 'volume'], dtype='object') 
 {'time': '2020-11-02', 'price': 115.0, 'high': 110.68, 'low': 107.32, 'close': 108.769997, 'adj close': 108.074883, 'volume': 122866900}
After: Index(['time', 'price', 'high', 'low', 'close', 'adj close', 'volume',
       'price range'],
      dtype='object') 
 {'time': '2020-11-02', 'price': 115.0, 'high': 110.68, 'low': 107.32, 'close': 108.769997, 'adj close': 108.074883, 'volume': 122866900, 'price range': 3.3600000000000136}


In [26]:
# delete a column
print(f"Before: {df.columns} \n {df.loc[0].to_dict()}")
del df["price range"]
print(f"After: {df.columns} \n {df.loc[0].to_dict()}")

Before: Index(['time', 'price', 'high', 'low', 'close', 'adj close', 'volume',
       'price range'],
      dtype='object') 
 {'time': '2020-11-02', 'price': 115.0, 'high': 110.68, 'low': 107.32, 'close': 108.769997, 'adj close': 108.074883, 'volume': 122866900, 'price range': 3.3600000000000136}
After: Index(['time', 'price', 'high', 'low', 'close', 'adj close', 'volume'], dtype='object') 
 {'time': '2020-11-02', 'price': 115.0, 'high': 110.68, 'low': 107.32, 'close': 108.769997, 'adj close': 108.074883, 'volume': 122866900}
