# INSTALLATIONS
1. Install Pandas
2. Install Numpy

In [2]:
import pandas as pd
import numpy as np

## Pandas
When working with tabular data, such as data stored in spreadsheets or databases, pandas is the right tool for you. pandas will help you to explore, clean, and process your data. In pandas, a data table is called a DataFrame.

![finallpandas.png](attachment:finallpandas.png)

### What is a Dataframe?
##### Two-dimensional, size-mutable, potentially heterogeneous tabular data.
Dataframe is a main object in pandas. It is used to represent data with Rows and Columns. Vizualize data as stored in a excel spreadsheet.

pandas.DataFrame(data=None, index=None, columns=None, dtype=None, copy=None) <br>
https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html

Resource- https://pandas.pydata.org/pandas-docs/stable/getting_started/index.html#getting-started <br>
YouTube Tutorial- https://www.youtube.com/watch?v=vmEHCJofslg

### 1. Creating a DataFrame.
https://pandas.pydata.org/docs/reference/frame.html

In [3]:
## 1. Creating a DataFrame.
data = {'name': ['Jay', 'Ron', 'joe', 'Ross'],
        'age': ['5', '10', '7', '6'],
        'score': [9.4, 8.9, 7.2, 9]} 

df = pd.DataFrame(data)

## Parameters ##
## Data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
## index: The index (row labels) of the DataFrame.
## columns: The column labels of the DataFrame.
## dtype
## copy

df.to_csv('C:\\Users\\jayzo\\PycharmProjects\\DataZen_DataAnalysis\\text_csv', index= False)
print(df)

OSError: Cannot save file into a non-existent directory: 'C:\Users\jayzo\PycharmProjects\DataZen_DataAnalysis'

### 2. Reading Data from a CSV file
https://pandas.pydata.org/docs/reference/io.html

In [None]:
df1 = pd.read_csv('C:\\Users\\jayzo\\PycharmProjects\\DataZen_DataAnalysis\\text_csv')
df1

Unnamed: 0,name,age,score
0,Jay,5,9.4
1,Ron,10,8.9
2,joe,7,7.2
3,Ross,6,9.0


### 3. Acessing and Manipulating data of a dataframe
https://pandas.pydata.org/Pandas_Cheat_Sheet.pdf

In [None]:
df = pd.read_csv('C:\\Users\\jayzo\\PycharmProjects\\DataZen_DataAnalysis\\TataSteel_HistoricData.csv')
df

Unnamed: 0,Date,series,OPEN,HIGH,LOW,PREV. CLOSE,ltp,close,vwap,52W H,52W L,VOLUME,VALUE,No of trades
0,25-Aug-2022,EQ,107.05,107.9,105.75,106.55,106.5,106.25,106.98,1476.0,97.15,42186335,4513082704.7,172570
1,24-Aug-2022,EQ,107.55,108.15,105.75,107.6,106.3,106.55,106.69,1476.0,97.15,55686404,5941287674.1,247974
2,23-Aug-2022,EQ,104.2,108.25,103.5,105.05,107.9,107.6,106.67,1476.0,97.15,70531639,7523872406.95,262215
3,22-Aug-2022,EQ,109.5,109.6,104.7,110.05,105.2,105.05,106.79,1476.0,97.15,74037526,7906795316.15,402941
4,19-Aug-2022,EQ,112.55,112.9,109.55,112.55,109.75,110.05,111.08,1534.5,97.15,57134813,6346785640.6,227022
5,18-Aug-2022,EQ,112.5,113.0,111.4,112.5,112.7,112.55,112.33,1534.5,97.15,49396425,5548515767.75,179401
6,17-Aug-2022,EQ,113.5,113.5,112.25,113.0,112.4,112.5,112.85,1534.5,97.15,54008422,6094999817.65,207669
7,16-Aug-2022,EQ,113.7,113.7,111.1,112.65,113.0,113.0,112.55,1534.5,97.15,69207437,7789100716.7,238391
8,12-Aug-2022,EQ,109.35,112.9,108.8,109.1,112.8,112.65,111.88,1534.5,97.15,95942333,10733610778.1,307921
9,11-Aug-2022,EQ,110.3,110.65,108.6,109.2,109.0,109.1,109.49,1534.5,97.15,75321498,8247181079.65,215699


In [None]:
rows, columns = df.shape
print('rows',rows)
print('columns',columns)

rows 22
columns 14


In [None]:
## Head prints first n number of rows
print(df.head(5))

## Head prints first n number of columns
print(df.tail(3))

         Date  series    OPEN    HIGH     LOW   PREV. CLOSE     ltp   close   \
0  25-Aug-2022      EQ  107.05  107.90  105.75        106.55  106.50  106.25   
1  24-Aug-2022      EQ  107.55  108.15  105.75        107.60  106.30  106.55   
2  23-Aug-2022      EQ  104.20  108.25  103.50        105.05  107.90  107.60   
3  22-Aug-2022      EQ  109.50  109.60  104.70        110.05  105.20  105.05   
4  19-Aug-2022      EQ  112.55  112.90  109.55        112.55  109.75  110.05   

    vwap     52W H   52W L    VOLUME             VALUE   No of trades   
0  106.98  1,476.00   97.15  42186335  4,513,082,704.70         172570  
1  106.69  1,476.00   97.15  55686404  5,941,287,674.10         247974  
2  106.67  1,476.00   97.15  70531639  7,523,872,406.95         262215  
3  106.79  1,476.00   97.15  74037526  7,906,795,316.15         402941  
4  111.08  1,534.50   97.15  57134813  6,346,785,640.60         227022  
          Date  series   OPEN    HIGH     LOW   PREV. CLOSE     ltp   close   \
1

In [None]:
## Slicing a Dataframe
print(df.iloc[0])
print(df.iloc[0,0:3])

Date       25-Aug-2022
series              EQ
OPEN            107.05
Name: 0, dtype: object


In [None]:
## Descibe
df.describe()

Unnamed: 0,OPEN,HIGH,LOW,PREV. CLOSE,ltp,close,vwap,52W L,VOLUME,No of trades
count,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0,22.0
mean,223.440909,226.802273,220.354545,261.654545,224.1,223.936364,223.885455,196.675,67109350.0,278619.136364
std,296.659817,301.352059,293.152926,332.84643,298.381841,297.974469,297.816316,256.359876,38845430.0,134171.081903
min,98.1,102.0,97.15,100.35,100.2,100.35,99.48,97.15,5255902.0,144783.0
25%,107.5125,108.3,105.75,107.2375,107.1375,107.0875,106.8625,97.15,50549420.0,205755.0
50%,108.675,109.75,106.6,108.675,108.075,107.975,108.135,97.15,63182110.0,237183.5
75%,112.5375,112.975,110.7125,112.625,112.625,112.5375,112.2175,97.15,75000500.0,278565.0
max,972.0,976.7,946.05,960.7,964.7,960.7,959.86,827.0,166959900.0,628262.0


## Numpy
NumPy is the fundamental package for scientific computing in Python. It is a Python library that provides a multidimensional array object, various derived objects (such as masked arrays and matrices), and an assortment of routines for fast operations on arrays, including mathematical, logical, shape manipulation, sorting, selecting, I/O, discrete Fourier transforms, basic linear algebra, basic statistical operations, random simulation and much more.

![image.png](attachment:image.png)

https://numpy.org/doc/stable/user/absolute_beginners.html

### Getting started
Github Resources- https://github.com/KeithGalli/NumPy/blob/master/NumPy%20Tutorial.ipynb

In [None]:
arr1 = np.array([[9.0,8.0,7.0,5.0,9.0],[6.0,5.0,4.0,7.8,6.9]])
print(arr1)

arr2 = np.array([[9.0,8.0,7.0,5.0,9.0,4.0,3.2,7.9],[6.0,5.0,4.0]], dtype =object)
print(arr2)

[[9.  8.  7.  5.  9. ]
 [6.  5.  4.  7.8 6.9]]
[list([9.0, 8.0, 7.0, 5.0, 9.0, 4.0, 3.2, 7.9]) list([6.0, 5.0, 4.0])]


In [None]:
# Get Dimension
print('Dimension of Array 1-',arr1.ndim)
print('Dimension of Array 2-',arr2.ndim)

# Get Shape
print('Shape of Array 1-',arr1.shape)
print('Shape of Array 2-',arr2.shape)

# Get Type
print('Type of Array 1-',arr1.dtype)
print('Type of Array 2-',arr2.dtype)

# Get Size
print('Size ',arr1.itemsize)

# Get number of elements
print('number of elements ',arr1.size)

Dimension of Array 1- 2
Dimension of Array 2- 1
Shape of Array 1- (2, 5)
Shape of Array 2- 1
Type of Array 1- float64
Type of Array 2- 1
Size  8
number of elements  10


### Acessing and Manipulating Arrays

In [None]:
# Get a specific element [r, c]
print(arr1[0, 2])

# Get a specific row 
print(arr1[0, :])

# Get a specific column
print(arr1[:, 2])

# Getting a little more fancy [startindex:endindex:stepsize]
print(arr1[0, 1:-1:2])

7.0
[9. 8. 7. 5. 9.]
[7. 4.]
[8. 5.]


### Initializing Different Types of Arrays

In [None]:
# All 0s matrix
print(np.zeros((2,3)))
print('*'*25)
# All 1s matrix
print(np.ones((4,2,2), dtype='int32'))
print('*'*25)

# Any other number
print(np.full((2,3,2), 99, dtype='int32'))
print('*'*25)

[[0. 0. 0.]
 [0. 0. 0.]]
*************************
[[[1 1]
  [1 1]]

 [[1 1]
  [1 1]]

 [[1 1]
  [1 1]]

 [[1 1]
  [1 1]]]
*************************
[[[99 99]
  [99 99]
  [99 99]]

 [[99 99]
  [99 99]
  [99 99]]]
*************************


In [None]:
# Random decimal numbers
print(np.random.rand(4,2))

# Random Integer values
print(np.random.randint(-4,8, size=(3,3)))

# The identity matrix
print(np.identity(5))

NameError: name 'np' is not defined