```
In Python, a Series is a one-dimensional array-like structure provided by the pandas library.
It can hold any data type.
Each element in a series is associated with an index.
```

# Creating Series

In [2]:
import pandas as pd
Lt = [10,20,30,40,50]
series = pd.Series(Lt) # creating series from list
print(series)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [3]:
Lt = [10,20,30,40,50]
series = pd.Series(Lt, index = ("a","b","c","d","e")) # changing index parameter
print(series)

# dtype represents data type of elements in the series...int64 means each integer occupies 64 bits of memory (8 bytes)

a    10
b    20
c    30
d    40
e    50
dtype: int64


In [4]:
Lt = [10,20,30,40,50]
series = pd.Series(Lt, index = ("z","x","r","d","e"), dtype = "int32") # Setting dtype to int32
print(series)

z    10
x    20
r    30
d    40
e    50
dtype: int32


In [5]:
Lt = [10,20,30,40,50]
series = pd.Series(Lt, dtype = "float")
print(series)

0    10.0
1    20.0
2    30.0
3    40.0
4    50.0
dtype: float64


In [6]:
series = pd.Series((3,4,5,6,6,)) # creating series from a tuple
print(series)

0    3
1    4
2    5
3    6
4    6
dtype: int64


In [7]:
series = pd.Series(range(11,16)) # creating series from a range
print(series)

0    11
1    12
2    13
3    14
4    15
dtype: int64


In [8]:
series = pd.Series({1:33,4:55,6:66,7:77}) # creating series from a dictionary
print(series)

1    33
4    55
6    66
7    77
dtype: int64


In [9]:
scaler = pd.Series(8)
print(scaler)

0    8
dtype: int64


In [10]:
# sr = pd.Series(8,3)
# sr

In [11]:
two = pd.Series([[3,3,4,4],[3,4,5,5],[3,4,4]]) # creating series containing lists as elements
two 

0    [3, 3, 4, 4]
1    [3, 4, 5, 5]
2       [3, 4, 4]
dtype: object

In [12]:
two.size

3

In [13]:
import pandas as pd
import numpy as np

random = pd.Series(np.random.randint(1,101,10)) # Creating a Series with random integers
random

0     2
1     9
2    48
3    68
4    69
5    97
6    40
7    14
8    21
9    23
dtype: int32

# Series Attribute

In [14]:
import pandas as pd
data = {'Math': 85, 'Science': 90, 'English': 75, 'History': 80}
series = pd.Series(data)
series

Math       85
Science    90
English    75
History    80
dtype: int64

In [15]:
series.dtype

dtype('int64')

In [16]:
series.size

4

In [17]:
series.is_unique # if any value is duplicated it will give false

True

In [18]:
series.index

Index(['Math', 'Science', 'English', 'History'], dtype='object')

In [19]:
series.shape

(4,)

In [20]:
series["Math"] # indexing

np.int64(85)

# Series Methods

In [21]:
series.head(2) 

Math       85
Science    90
dtype: int64

In [22]:
series.tail(2)

English    75
History    80
dtype: int64

In [23]:
series.sum()

np.int64(330)

In [24]:
series.mean()

np.float64(82.5)

In [25]:
series.unique()

array([85, 90, 75, 80])

In [26]:
series.sort_values()

English    75
History    80
Math       85
Science    90
dtype: int64

In [27]:
series.sort_values(ascending=False)

Science    90
Math       85
History    80
English    75
dtype: int64

# Indexing, slicing and stepping

Indexing lets you access individual elements. Slicing returns a range of values. Stepping allows you to skip values at regular intervals

In [28]:
x = np.random.randint(1,50,10)
random_ser = pd.Series(x)
random_ser

0    12
1    32
2    48
3    31
4    20
5    17
6    28
7    29
8    15
9    33
dtype: int32

In [40]:
#Indexing
random_ser[3]

np.int32(31)

In [41]:
# using .iloc[] for integer location-based indexing
random_ser.iloc[5]

np.int32(17)

In [44]:
# slicing , syntax = series[start:end]
random_ser[2:6]

2    48
3    31
4    20
5    17
dtype: int32

In [33]:
random_ser.iloc[1:4]

1    32
2    48
3    31
dtype: int32

In [34]:
random_ser.iloc[1:5]

1    32
2    48
3    31
4    20
dtype: int32

In [35]:
random_ser.iloc[1:]

1    32
2    48
3    31
4    20
5    17
6    28
7    29
8    15
9    33
dtype: int32

In [37]:
random_ser.iloc[:]

0    12
1    32
2    48
3    31
4    20
5    17
6    28
7    29
8    15
9    33
dtype: int32

In [46]:
# Stepping, syntax = series[start:end:step]
random_ser[::2]

0    12
2    48
4    20
6    28
8    15
dtype: int32

In [39]:
random_ser.iloc[::2]

0    12
2    48
4    20
6    28
8    15
dtype: int32

```
A DataFrame in pandas is a 2-D, labeled data structure that is similar to a table or spreadsheet

It consists of rows and columns, where each column can store different data types.

DataFrames are one of the most commonly used data structures in pandas because they provide a flexible way to manipulate, analyze, and work with data 
```

# Creating DataFrame

In [1]:
import pandas as pd
a = pd.Series(range(1,11))
a

0     1
1     2
2     3
3     4
4     5
5     6
6     7
7     8
8     9
9    10
dtype: int64

In [2]:
a.shape

(10,)

In [3]:
print(type(a))

<class 'pandas.core.series.Series'>


In [12]:
b = pd.DataFrame(range(1,11)) # creating DataFrame with range function
b

Unnamed: 0,0
0,1
1,2
2,3
3,4
4,5
5,6
6,7
7,8
8,9
9,10


In [6]:
b.shape # 10 rows and 1 column

(10, 1)

In [7]:
print(type(b))

<class 'pandas.core.frame.DataFrame'>


In [8]:
d = pd.DataFrame(['John',28,'New York'])
d

Unnamed: 0,0
0,John
1,28
2,New York


In [10]:
d = pd.DataFrame((3,4,5,5,6,6,9))
d
# If you pass 1-D data structure to a DataFrame in Pandas, it will treat each element as a row in a single column by default.

Unnamed: 0,0
0,3
1,4
2,5
3,5
4,6
5,6
6,9


In [13]:
data = [
    ['John',28,'New York'],
    ['Sara',22,'Los Angeles'],  # Defining data as a list of lists
    ['Mike',32,'Chicago'],
    ['Emma',25,'Houston']
]

df = pd.DataFrame(data, columns=('Name','Age','City'))
df

Unnamed: 0,Name,Age,City
0,John,28,New York
1,Sara,22,Los Angeles
2,Mike,32,Chicago
3,Emma,25,Houston


In [16]:
c = pd.DataFrame([[3,4,4,5,6],[3,4,4,5,6]], index=("a","b")) # creating dataframe with list
c
 
# when pass 2-D data structure to a pandas DataFrame, each 1-D will be treated as row

Unnamed: 0,0,1,2,3,4
a,3,4,4,5,6
b,3,4,4,5,6


In [18]:
import pandas as pd

data = {
    'Name': ['John','Sara','Mike','Emma'],
    'Age': [28,22,32,25],
    'City': ['New York','Los Angeles','Chicago','Houston']
}

df1 = pd.DataFrame(data) # creating dataframe with dictionary
df1
# here keys are treated as column names and values as data for each column

Unnamed: 0,Name,Age,City
0,John,28,New York
1,Sara,22,Los Angeles
2,Mike,32,Chicago
3,Emma,25,Houston


In [19]:
import pandas as pd
import numpy as np

random_data = np.random.randint(0,100,size=(6,3))

df2 = pd.DataFrame(random_data, columns=('A','B','C'), index= (range(2,8)))
df2 # creating a dataframe with random integers

Unnamed: 0,A,B,C
2,30,1,80
3,55,41,92
4,46,57,73
5,68,11,39
6,41,78,91
7,77,7,82


# Read CSV File

In [2]:
import pandas as pd
data = pd.read_csv(r"C:\Users\shris\OneDrive\Desktop\Python_clg\matches.csv")

In [3]:
data

Unnamed: 0,id,season,city,date,team1,team2,toss_winner,toss_decision,result,dl_applied,winner,win_by_runs,win_by_wickets,player_of_match,venue,umpire1,umpire2,umpire3
0,1,2017,Hyderabad,05-04-2017,Sunrisers Hyderabad,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Sunrisers Hyderabad,35,0,Yuvraj Singh,"Rajiv Gandhi International Stadium, Uppal",AY Dandekar,NJ Llong,
1,2,2017,Pune,06-04-2017,Mumbai Indians,Rising Pune Supergiant,Rising Pune Supergiant,field,normal,0,Rising Pune Supergiant,0,7,SPD Smith,Maharashtra Cricket Association Stadium,A Nand Kishore,S Ravi,
2,3,2017,Rajkot,07-04-2017,Gujarat Lions,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Kolkata Knight Riders,0,10,CA Lynn,Saurashtra Cricket Association Stadium,Nitin Menon,CK Nandan,
3,4,2017,Indore,08-04-2017,Rising Pune Supergiant,Kings XI Punjab,Kings XI Punjab,field,normal,0,Kings XI Punjab,0,6,GJ Maxwell,Holkar Cricket Stadium,AK Chaudhary,C Shamshuddin,
4,5,2017,Bangalore,08-04-2017,Royal Challengers Bangalore,Delhi Daredevils,Royal Challengers Bangalore,bat,normal,0,Royal Challengers Bangalore,15,0,KM Jadhav,M Chinnaswamy Stadium,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
631,632,2016,Raipur,22-05-2016,Delhi Daredevils,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Royal Challengers Bangalore,0,6,V Kohli,Shaheed Veer Narayan Singh International Stadium,A Nand Kishore,BNJ Oxenford,
632,633,2016,Bangalore,24-05-2016,Gujarat Lions,Royal Challengers Bangalore,Royal Challengers Bangalore,field,normal,0,Royal Challengers Bangalore,0,4,AB de Villiers,M Chinnaswamy Stadium,AK Chaudhary,HDPK Dharmasena,
633,634,2016,Delhi,25-05-2016,Sunrisers Hyderabad,Kolkata Knight Riders,Kolkata Knight Riders,field,normal,0,Sunrisers Hyderabad,22,0,MC Henriques,Feroz Shah Kotla,M Erasmus,C Shamshuddin,
634,635,2016,Delhi,27-05-2016,Gujarat Lions,Sunrisers Hyderabad,Sunrisers Hyderabad,field,normal,0,Sunrisers Hyderabad,0,4,DA Warner,Feroz Shah Kotla,M Erasmus,CK Nandan,


In [4]:
type(data)

pandas.core.frame.DataFrame

read_csv() function in Pandas is used to load data from a CSV (Common-Separated Values) file into a Pandas DataFrame. It is one of the most commonly used functions for importing data in data science.


### different ways to read files using Pandas
read_excel() - Reads data from excel file (with .xls or .xlsx extensions)

read_json() - Reads data from a JSON file (a format used to store data like a dictionary)

read_html() - Reads tables from a webpage (HTML file)

read-sql() - Reads data from a SQL database (used for storing large amounts of data in tables)