In [None]:
# Pandas
# Pandas is a fast, powerful, flexible, and easy-to-use open-source data analysis and
# manipulation tool, built on top of the Python programming language.

# Pandas is the backbone of most data science projects

# Pandas is an open-source Python library providing high-performance,
# easy-to-use data structures and data analysis tools for the Python
# programming language. Python with Pandas is used in a wide range of
# fields including academic and commercial domains such as finance,
# economics, statistics, analytics, etc.

# In 2008, developer Wes McKinney started developing Pandas when he needed
# a high-performance, flexible tool for data analysis.

# Prior to Pandas, Python was majorly used for data munging and preparation.
# It had very little contribution towards data analysis.
# Pandas solved this problem. Using Pandas, we can accomplish five typical
# steps in the processing and analysis of data, regardless of the
# origin of data - load, prepare, manipulate, model, and analyze.

# Key Features of Pandas

# a.) Tools for loading data into in-memory data objects from different file formats
# b.) Data alignment and integrated handling of missing data
# c.) Label-based slicing, indexing, and subsetting of large datasets
# d.) Columns from a data structure can be deleted or inserted
# e.) Grouping of data for aggregation and transformations
# f.) High-performance merging and joining of data
# g.) Time Series functionality

# NumPy vs Pandas

# a.) The Pandas module mainly works with tabular data, whereas the NumPy module works with
# numerical data.

# b.) Pandas provides powerful tools like DataFrame and Series that are mainly used
# for analyzing data, whereas the NumPy module offers a powerful object called Array.

# c.) Pandas covers a broader range of applications, being mentioned in
# 73 company stacks and 46 developer stacks, whereas NumPy is mentioned in
# 62 company stacks and 32 developer stacks.

# d.) Pandas has better performance for datasets with a large number of rows,
# whereas NumPy performs better for smaller datasets (50K rows or less).


In [2]:
import numpy as np
import pandas as pd
print(pd.__version__)

2.2.2


In [3]:
arr = np.array([1,2,5,8])
print(arr)
print(type(arr))

[1 2 5 8]
<class 'numpy.ndarray'>


In [4]:
Series = pd.Series(arr)
print(type(Series))
print(Series)

<class 'pandas.core.series.Series'>
0    1
1    2
2    5
3    8
dtype: int64


In [5]:
age = pd.Series([67,76,12],index=['age1','age2','age3'])
print(age)
print(age['age2'])
print(age['age3'])
print(age.iloc[0])

age1    67
age2    76
age3    12
dtype: int64
76
12
67


In [6]:
s = pd.Series([10,20,30,40,50],index=['val1','val2','val3','val4','val5'])
print(s)
print(s.iloc[2])
print(s['val3'])
print(s['val1':'val4'])

val1    10
val2    20
val3    30
val4    40
val5    50
dtype: int64
30
30
val1    10
val2    20
val3    30
val4    40
dtype: int64


In [7]:
print("\nusing for loop")
for i in s:
  print(i)


using for loop
10
20
30
40
50


In [8]:
print("\nPerforming Numerical Computation")
print(s*2)


Performing Numerical Computation
val1     20
val2     40
val3     60
val4     80
val5    100
dtype: int64


In [9]:
s = pd.Series([10,20,30,40,50],index=['val1','val2','val3','val4','val5'])
print(s.values)
print(s.shape)
print(s.index)
print(s.dtype)

[10 20 30 40 50]
(5,)
Index(['val1', 'val2', 'val3', 'val4', 'val5'], dtype='object')
int64
