Using Pandas for Data Manipulation and Analysing

Creating a DataFrame

In [10]:
import pandas as pd

#Creating a dictionary
data = {
    'Coin': ['Bitcoin', 'Ethereum', 'Ripple', 'Litecoin'], 
    'Price': [45000, 3000, 0.84, 120],
    'Volume': [15000, 'nan', 200000, 50000]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Coin,Price,Volume
0,Bitcoin,45000.0,15000.0
1,Ethereum,3000.0,
2,Ripple,0.84,200000.0
3,Litecoin,120.0,50000.0


Exploring the Data

In [3]:
df.head(2)

Unnamed: 0,Coin,Price,Volume
0,Bitcoin,45000.0,15000
1,Ethereum,3000.0,8000


In [4]:
df.tail(2)

Unnamed: 0,Coin,Price,Volume
2,Ripple,0.84,200000
3,Litecoin,120.0,50000


In [None]:
#To get basic information
print(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Coin    4 non-null      object 
 1   Price   4 non-null      float64
 2   Volume  4 non-null      object 
dtypes: float64(1), object(2)
memory usage: 228.0+ bytes
None


Basic Data Manipulation

In [14]:
print(df[['Coin', 'Price']])

       Coin     Price
0   Bitcoin  45000.00
1  Ethereum   3000.00
2    Ripple      0.84
3  Litecoin    120.00


Filtering Rows

In [16]:
print(df[df['Price'] > 100])

       Coin    Price Volume
0   Bitcoin  45000.0  15000
1  Ethereum   3000.0    nan
3  Litecoin    120.0  50000


Sorting Data

In [None]:
print(df.sort_values('Price')) #sorting in ascending order

       Coin     Price  Volume
2    Ripple      0.84  200000
3  Litecoin    120.00   50000
1  Ethereum   3000.00     nan
0   Bitcoin  45000.00   15000


In [20]:
print(df.sort_values('Coin', ascending = False))

       Coin     Price  Volume
2    Ripple      0.84  200000
3  Litecoin    120.00   50000
1  Ethereum   3000.00     nan
0   Bitcoin  45000.00   15000


Adding new column

In [21]:
df['Price_after_2'] = df['Price'] * 1.02
df

Unnamed: 0,Coin,Price,Volume,Price_after_2
0,Bitcoin,45000.0,15000.0,45900.0
1,Ethereum,3000.0,,3060.0
2,Ripple,0.84,200000.0,0.8568
3,Litecoin,120.0,50000.0,122.4


In [22]:
df.describe()

Unnamed: 0,Price,Price_after_2
count,4.0,4.0
mean,12030.21,12270.8142
std,22023.552469,22464.023519
min,0.84,0.8568
25%,90.21,92.0142
50%,1560.0,1591.2
75%,13500.0,13770.0
max,45000.0,45900.0


saving and  loading with pandas

In [24]:
df.to_csv('crypto_data.csv', index = False)

Working with Numpy

In [27]:
import numpy as np

#creating a numpy array

prices = np.array([45000, 50000, 4000, 20000])
print(prices)

[45000 50000  4000 20000]


In [29]:
print(prices.shape) #checking the shape
 
print('') 

print(prices.dtype) #toget the data type

(4,)

int64


Basic Data Manipulation

In [30]:
#To add 1000 to all prices
prices_plus_100 = prices + 1000
print(prices_plus_100) 

[46000 51000  5000 21000]


In [31]:
#To compute for 2percent increase in price

price_new = np.array([46000, 51000,  5000, 21000])
price_increase = price_new + 1.02
print(price_increase)

[46001.02 51001.02  5001.02 21001.02]


In [32]:
#filtering data
higher_prices = price_new[price_new > 1000] 

print(higher_prices)

[46000 51000  5000 21000]


Basic Statistical Method in Numpy

In [34]:
volume = np.array([500000, 300000, 200000, 100000])
print('Print the Statistics for Volume')
print('Mean:', np.mean(volume))
print('Minimum Volume:', np.min(volume))
print('Maximum Volume', np.max(volume))
print('Volatility (Standard Deviation):', np.std(volume))

Print the Statistics for Volume
Mean: 275000.0
Minimum Volume: 100000
Maximum Volume 500000
Volatility (Standard Deviation): 147901.9945774904


Working with a 2D array

In [37]:
crypto_data = np.array([
    [45000, 500],
    [30000, 28000],
    [70000, 5600]
    ])

print(crypto_data)

[[45000   500]
 [30000 28000]
 [70000  5600]]


In [None]:
#Assessing rows
crypto_data[::2]

array([[45000,   500],
       [70000,  5600]])