In [106]:
import pandas as pd
import numpy as np

In [107]:
pd.__version__

'1.2.4'

In [108]:
a = np.arange(10)

In [109]:
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [110]:
pd.Series(a)

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int32

In [111]:
pd.Series(np.arange(5))

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [112]:
pd.Series(np.arange(8), name="Testing Array")

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
Name: Testing Array, dtype: int32

In [113]:
sales = [0, 5, 20, 25, 48, 75, 79, 83, 64, 81]
sales_series = pd.Series(sales, name="Sales Data")
sales_series

0     0
1     5
2    20
3    25
4    48
5    75
6    79
7    83
8    64
9    81
Name: Sales Data, dtype: int64

In [114]:
sales_series.values

array([ 0,  5, 20, 25, 48, 75, 79, 83, 64, 81], dtype=int64)

In [115]:
sales_series.index

RangeIndex(start=0, stop=10, step=1)

In [116]:
sales_series.name

'Sales Data'

In [117]:
sales_series.dtype

dtype('int64')

In [118]:
import pandas as pd
import numpy as np

In [119]:
a = [1, 7, 3, 6]
x = pd.Series(a)
x

0    1
1    7
2    3
3    6
dtype: int64

In [120]:
print(x[0])

1


# Create Labels

### With the 'index' argument, we can name our own labels

In [121]:
y = [7, 5, 6]
z = pd.Series(y, index=['x', 'y', 'z'])
print(z)

x    7
y    5
z    6
dtype: int64


In [122]:
print(z['y'])

5


# Key/Value Objects as Series

In [123]:
#Create a simple panda Series from a dictionary
calories = {'day1': 420, 'day2':540, 'day3':987}
x = pd.Series(calories)
print(x)

day1    420
day2    540
day3    987
dtype: int64


In [124]:
#Create a Series using only data from 'day1' and 'day2'
calories = {'day1':420, 'day2':380, 'day3':390}
x = pd.Series(calories, index=['day1', 'day2'])
x

day1    420
day2    380
dtype: int64

In [125]:
sales

[0, 5, 20, 25, 48, 75, 79, 83, 64, 81]

In [126]:
sales_series

0     0
1     5
2    20
3    25
4    48
5    75
6    79
7    83
8    64
9    81
Name: Sales Data, dtype: int64

In [127]:
sales_series.astype(bool)

0    False
1     True
2     True
3     True
4     True
5     True
6     True
7     True
8     True
9     True
Name: Sales Data, dtype: bool

In [128]:
sales_series.astype(float)

0     0.0
1     5.0
2    20.0
3    25.0
4    48.0
5    75.0
6    79.0
7    83.0
8    64.0
9    81.0
Name: Sales Data, dtype: float64

In [129]:
sales_series.astype('datetime64')

ValueError: The 'datetime64' dtype has no unit. Please pass in 'datetime64[ns]' instead.

# Index

    using 'index' easily access rows in Pandas Series or Data Frames

In [130]:
data = [58, 98, 78, 54, 69, 36, 9, 7, 897]
ds = pd.Series(data, name="Sales Data")
ds

0     58
1     98
2     78
3     54
4     69
5     36
6      9
7      7
8    897
Name: Sales Data, dtype: int64

In [131]:
#data access by index
ds[5]

36

In [132]:
ds[2:5]

2    78
3    54
4    69
Name: Sales Data, dtype: int64

# Custom indices

In [133]:
sales = [0, 5, 155, 0, 518]
items = ['coffee', 'bananas', 'tea', 'coconut', 'sugar']

sales_series = pd.Series(sales, index=items, name="Sales Record")
sales_series

coffee       0
bananas      5
tea        155
coconut      0
sugar      518
Name: Sales Record, dtype: int64

In [134]:
sales_series.index = ['coffee', 'bananas', 'tea', 'coconut', 'sugar']

In [135]:
sales_series

coffee       0
bananas      5
tea        155
coconut      0
sugar      518
Name: Sales Record, dtype: int64

In [136]:
sales_series['tea']

155

In [137]:
sales_series['tea':'sugar']

tea        155
coconut      0
sugar      518
Name: Sales Record, dtype: int64

# .iloc[] method

Access the values by their positional index
    1. This methods works when Series have a custom, non-integers index
    2. It is more efficient than silicing and is recommended by the Pandas creators
    
            Syntax: df.iloc[row positions, column positions]
                0 - (single row]
                [5, 8] - (multiple rows)
                [0:11] - (range of rows)

In [138]:
sales_series

coffee       0
bananas      5
tea        155
coconut      0
sugar      518
Name: Sales Record, dtype: int64

In [139]:
sales_series.iloc[2]

155

In [140]:
sales_series.iloc[2: 4]

tea        155
coconut      0
Name: Sales Record, dtype: int64

# .loc[] method

        Access values by their custom labels
            
            
            df.iloc[row label, column label]

In [141]:
sales_series.loc['tea']

155

In [142]:
sales_series.loc['bananas': 'sugar']

bananas      5
tea        155
coconut      0
sugar      518
Name: Sales Record, dtype: int64

# Duplicate index  value

- It is possibe to have duplicate index values in pandas Series or DataFrame
            
                Access these indices by their labeling using .loc[] returns all correspondance rows

In [143]:
sales = [0, 5, 155, 0, 518]
items = ['coffee', 'coffee', 'tea', 'coconut', 'sugar']

df = pd.Series(sales, index=items, name='Sales')
df

coffee       0
coffee       5
tea        155
coconut      0
sugar      518
Name: Sales, dtype: int64

In [144]:
df.loc['coffee']

coffee    0
coffee    5
Name: Sales, dtype: int64

# Resetting the Index

We can 'reset the index' in a Series or DataFrame back to default range of integers by using the '.rest_index()' method

- bydefault,the existing index will become a new column in DataFrame

In [145]:
df

coffee       0
coffee       5
tea        155
coconut      0
sugar      518
Name: Sales, dtype: int64

In [146]:
df.reset_index()

Unnamed: 0,index,Sales
0,coffee,0
1,coffee,5
2,tea,155
3,coconut,0
4,sugar,518


In [147]:
df.reset_index(drop=True)

0      0
1      5
2    155
3      0
4    518
Name: Sales, dtype: int64

# Pandas Series Example

In [148]:
#1.
import pandas as pd
import numpy as np

#list
x = [1, 6, 9, 10]
y = pd.Series(x, name="Simple Array")
print(y)

0     1
1     6
2     9
3    10
Name: Simple Array, dtype: int64


In [149]:
#2. Create a series from an array

#dataset
x = np.array(['Fruits', 'Vegetables', 'Meat', 'Fish', 'Eggs', 'Lantils'])

#create a data Series
ds = pd.Series(x, name='Commodites')
print(ds)

0        Fruits
1    Vegetables
2          Meat
3          Fish
4          Eggs
5       Lantils
Name: Commodites, dtype: object


In [150]:
#3. Create a series from list

list = ['INC', 'BJP', 'AAP', 'SP', 'RLD', 'AIMIM']

series = pd.Series(list, name="Polictal Parties")
print(series)

0      INC
1      BJP
2      AAP
3       SP
4      RLD
5    AIMIM
Name: Polictal Parties, dtype: object


In [151]:
#4. Access first five element of Series

x = ['s', 'u', 'p', 'e', 'r', 'd', 'i', 'n', 'e', 'r', 'i', 'c', 'e']

ds = pd.Series(x)

#access first 5 elements
print(ds[:5])

0    s
1    u
2    p
3    e
4    r
dtype: object


In [152]:
#5. Access a single elements using index label

x = ['s', 'u', 'p', 'e', 'r', 'd', 'i', 'n', 'e', 'r', 'i', 'c', 'e']
ds = pd.Series(x, index=[6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18])
print(ds[14])

e


In [155]:
#6. 
df = pd.read_csv('M:\Python\Pandas\DataSets\sample1.csv')
df

Unnamed: 0,Name
0,Mohit Sharma
1,Anil Kumar
2,Anjali Nanda
3,Swati Sharma
4,Rohit Yadav
5,Harender Bisht
6,Sachin Goel
7,Aman Chauhan


In [163]:
series = pd.Series(df['Name'])

In [164]:
series.head(5)

0    Mohit Sharma
1      Anil Kumar
2    Anjali Nanda
3    Swati Sharma
4     Rohit Yadav
Name: Name, dtype: object

In [166]:
series[2:5]

2    Anjali Nanda
3    Swati Sharma
4     Rohit Yadav
Name: Name, dtype: object

In [168]:
df = pd.read_csv('M:\Python\Pandas\DataSets\sample1.csv')
df.head(10)

Unnamed: 0,Name
0,Mohit Sharma
1,Anil Kumar
2,Anjali Nanda
3,Swati Sharma
4,Rohit Yadav
5,Harender Bisht
6,Sachin Goel
7,Aman Chauhan
8,Sanjeev Kapoor
9,Azim Khan


In [180]:
ser = pd.Series(df['Name'])

In [181]:
data = ser.head(10)

In [182]:
data

0      Mohit Sharma
1        Anil Kumar
2      Anjali Nanda
3      Swati Sharma
4       Rohit Yadav
5    Harender Bisht
6       Sachin Goel
7      Aman Chauhan
8    Sanjeev Kapoor
9         Azim Khan
Name: Name, dtype: object

In [183]:
data.loc[3:6]

3      Swati Sharma
4       Rohit Yadav
5    Harender Bisht
6       Sachin Goel
Name: Name, dtype: object

In [184]:
data.iloc[3:6]

3      Swati Sharma
4       Rohit Yadav
5    Harender Bisht
Name: Name, dtype: object