In [175]:
# Reindexing 
# Reindexing script for the OpenStreetMap database

In [176]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

obj = pd.Series([4.5,7.2,-5.3,3.6], index=['a','b','c','d'])
obj 



a    4.5
b    7.2
c   -5.3
d    3.6
dtype: float64

In [177]:
# Reindexing the Series
obj2 = obj.reindex(['a','b','c','d','e'])
obj2
# Reindexing with fill value


a    4.5
b    7.2
c   -5.3
d    3.6
e    NaN
dtype: float64

In [178]:
obj3 = pd.Series(["blue", "purple", "yellow"], 
                 index=[0, 2, 4]) 
obj3
# Reindexing with fill value

0      blue
2    purple
4    yellow
dtype: object

In [179]:
obj3.reindex(range(6), method="ffill", fill_value="red")
# Reindexing with method


0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [180]:
obj3.reindex(range(6), method="bfill", fill_value="red")
# Reindexing with method


0      blue
1    purple
2    purple
3    yellow
4    yellow
5       red
dtype: object

In [181]:
obj3.reindex(range(6), method="nearest", fill_value="red")
# Reindexing with method


0      blue
1    purple
2    purple
3    yellow
4    yellow
5    yellow
dtype: object

In [182]:
obj3.reindex(range(6), method="pad", fill_value="red")
# Reindexing with method


0      blue
1      blue
2    purple
3    purple
4    yellow
5    yellow
dtype: object

In [183]:
obj3.reindex(range(6), method="backfill", fill_value="red")
# Reindexing with method


0      blue
1    purple
2    purple
3    yellow
4    yellow
5       red
dtype: object

In [184]:
frame = pd.DataFrame(np.arange(9).reshape((3, 3)), 
                     index=['a', 'c', 'd'],
                     columns=['Inazuma', 'Mondstadt', 'Sumeru'])
frame
# Reindexing the DataFrame


Unnamed: 0,Inazuma,Mondstadt,Sumeru
a,0,1,2
c,3,4,5
d,6,7,8


In [185]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2
# Reindexing the DataFrame with fill value, method, and limit

Unnamed: 0,Inazuma,Mondstadt,Sumeru
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [186]:
frame2 = frame.reindex(['a', 'b', 'c', 'd'])
frame2
# Reindexing the DataFrame with fill value, method, and limit

Unnamed: 0,Inazuma,Mondstadt,Sumeru
a,0.0,1.0,2.0
b,,,
c,3.0,4.0,5.0
d,6.0,7.0,8.0


In [187]:
frame.index.name = 'baris'  # Set the index name of the DataFrame
frame.columns.name = 'state'  # Set the column name of the DataFrame
print(frame)  # Display the DataFrame with index and column names
# year  state  pop  debt

#frame.reindex(state, axis="columns")


state  Inazuma  Mondstadt  Sumeru
baris                            
a            0          1       2
c            3          4       5
d            6          7       8


In [188]:
state = ['Inazuma','Liyue', 'Mondstadt', 'Sumeru']
# Reindexing the DataFrame with fill value, method, and limit
frame.reindex(state, axis="columns", fill_value=0, method="ffill", limit=1)
# Reindexing the DataFrame with fill value, method, and limit

state,Inazuma,Liyue,Mondstadt,Sumeru
baris,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a,0,0,1,2
c,3,3,4,5
d,6,6,7,8


In [189]:
frame.reindex(state, axis="columns")


state,Inazuma,Liyue,Mondstadt,Sumeru
baris,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
a,0,,1,2
c,3,,4,5
d,6,,7,8


In [190]:
frame.loc[['a', 'c', 'd'], ['Inazuma', 'Mondstadt']]  

# Reindexing the DataFrame with fill value, method, and limit

state,Inazuma,Mondstadt
baris,Unnamed: 1_level_1,Unnamed: 2_level_1
a,0,1
c,3,4
d,6,7


In [191]:
obj = pd.Series(np.arange(5), index=['a', 'b', 'c', 'd', 'e'])
obj
# Reindexing the Series with fill value, method, and limit

a    0
b    1
c    2
d    3
e    4
dtype: int64

In [192]:
new_obj = obj.drop(['c', 'd'])
new_obj
# Reindexing the Series with fill value, method, and limit

a    0
b    1
e    4
dtype: int64

In [193]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                     index=['Inazuma', 'Liyue', 'Mondstadt', 'Sumeru'],
                     columns=['population', 'hero', 'houses', 'S class'])
data
# Reindexing the DataFrame with fill value, method, and limit

Unnamed: 0,population,hero,houses,S class
Inazuma,0,1,2,3
Liyue,4,5,6,7
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [194]:
data.drop(index=['Inazuma', 'Liyue'])

# Reindexing the DataFrame with fill value, method, and limit

Unnamed: 0,population,hero,houses,S class
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [195]:
data.drop(columns=['hero']) 


Unnamed: 0,population,houses,S class
Inazuma,0,2,3
Liyue,4,6,7
Mondstadt,8,10,11
Sumeru,12,14,15


In [196]:
data.drop('hero', axis=1)  # Drop the 'hero' column from the DataFrame


Unnamed: 0,population,houses,S class
Inazuma,0,2,3
Liyue,4,6,7
Mondstadt,8,10,11
Sumeru,12,14,15


In [197]:
data.drop(['hero', 'S class'], axis='columns') 

# Drop the 'hero' and 'S class' columns from the DataFrame

Unnamed: 0,population,houses
Inazuma,0,2
Liyue,4,6
Mondstadt,8,10
Sumeru,12,14


In [198]:
# Lets Compare the two DataFrames

data1 = pd.DataFrame(np.arange(16).reshape((4, 4)),
                     index=['Inazuma', 'Liyue', 'Mondstadt', 'Sumeru'],
                     columns=['population', 'hero', 'houses', 'S class'])
data1

Unnamed: 0,population,hero,houses,S class
Inazuma,0,1,2,3
Liyue,4,5,6,7
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [199]:
data1.drop(['Inazuma', 'Liyue'], inplace=True)  
# Drop the 'Inazuma' and 'Liyue' rows from the DataFrame in place
data1

Unnamed: 0,population,hero,houses,S class
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [200]:
data1.drop(['hero'], axis='columns', inplace=True)
# Drop the 'hero' column from the DataFrame in place
data1


Unnamed: 0,population,houses,S class
Mondstadt,8,10,11
Sumeru,12,14,15


In [201]:
data1.drop(['hero', 'S class'], axis='columns', inplace=True)
# Drop the 'hero' and 'S class' columns from the DataFrame in place
data1
# Reindexing the DataFrame with fill value, method, and limit

KeyError: "['hero'] not found in axis"

In [None]:
data1.drop(['S class'], axis='columns', inplace=True)
# Drop the 'S class' column from the DataFrame in place
data1


Unnamed: 0,population,houses
Mondstadt,8,10
Sumeru,12,14


In [None]:
# Indexing, Selection, and Filtering
# Indexing, selection, and filtering are essential operations in data manipulation and analysis.
# They allow you to access and modify specific elements or subsets of data within a DataFrame or Series.
# In this section, we will explore various techniques for indexing, selecting, and filtering data in pandas.
# We will cover the following topics:
# 1. Indexing with labels and positions
# 2. Boolean indexing
# 3. Conditional filtering
# 4. Using the `query()` method
# 5. Using the `loc[]` and `iloc[]` accessors
# 6. Using the `at[]` and `iat[]` accessors
# 7. Using the `xs()` method
# 8. Using the `filter()` method
# 9. Using the `isin()` method
# 10. Using the `between()` method
# 11. Using the `nlargest()` and `nsmallest()` methods
# 12. Using the `sample()` method
# 13. Using the `set_index()` method
# 14. Using the `reset_index()` method
# 15. Using the `sort_index()` method
# 16. Using the `sort_values()` method
# 17. Using the `groupby()` method
# 18. Using the `pivot_table()` method
# 19. Using the `melt()` method
# 20. Using the `stack()` and `unstack()` methods
# 21. Using the `pivot()` method
# 22. Using the `crosstab()` method
# 23. Using the `merge()` method
# 24. Using the `join()` method
# 25. Using the `concat()` method
# 26. Using the `append()` method
# 27. Using the `combine_first()` method
# 28. Using the `combine()` method
# 29. Using the `update()` method
# 30. Using the `replace()` method
# 31. Using the `fillna()` method
# 32. Using the `dropna()` method
# 33. Using the `drop_duplicates()` method
# 34. Using the `duplicated()` method
# 35. Using the `value_counts()` method
# 36. Using the `unique()` method
# 37. Using the `nunique()` method
# 38. Using the `count()` method
# 39. Using the `sum()` method
# 40. Using the `mean()` method
# 41. Using the `median()` method
# 42. Using the `std()` method
# 43. Using the `var()` method
# 44. Using the `min()` method
# 45. Using the `max()` method
# 46. Using the `quantile()` method
# 47. Using the `describe()` method
# 48. Using the `info()` method
# 49. Using the `head()` method
# 50. Using the `tail()` method
# 51. Using the `sample()` method
# 52. Using the `to_csv()` method
# 53. Using the `to_excel()` method
# 54. Using the `to_json()` method
# 55. Using the `to_html()` method
# 56. Using the `to_sql()` method
# 57. Using the `to_pickle()` method
# 58. Using the `to_dict()` method
# 59. Using the `to_records()` method
# 60. Using the `to_numpy()` method
# 61. Using the `to_list()` method
# 62. Using the `to_string()` method
# 63. Using the `to_latex()` method
# 64. Using the `to_markdown()` method
# 65. Using the `to_clipboard()` method
# 66. Using the `to_parquet()` method
# 67. Using the `to_feather()` method
# 68. Using the `to_msgpack()` method
# 69. Using the `to_stata()` method
# 70. Using the `to_sas()` method
# 71. Using the `to_orc()` method
# 72. Using the `to_sqlalchemy()` method
# 73. Using the `to_sqlite()` method
# 74. Using the `to_sqlserver()` method
# 75. Using the `to_mssql()` method
# 76. Using the `to_mysql()` method
# 77. Using the `to_postgresql()` method
# 78. Using the `to_redshift()` method
# 79. Using the `to_teradata()` method
# 80. Using the `to_hive()` method
# 81. Using the `to_impala()` method
# 82. Using the `to_spark()` method
# 83. Using the `to_dask()` method
# 84. Using the `to_modin()` method
# 85. Using the `to_ray()` method
# 86. Using the `to_pyspark()` method
# 87. Using the `to_dask_dataframe()` method
# 88. Using the `to_modin_dataframe()` method
# 89. Using the `to_ray_dataframe()` method
# 90. Using the `to_pyspark_dataframe()` method
# 91. Using the `to_dask_array()` method
# 92. Using the `to_modin_array()` method
# 93. Using the `to_ray_array()` method
# 94. Using the `to_pyspark_array()` method
# 95. Using the `to_dask_series()` method
# 96. Using the `to_modin_series()` method
# 97. Using the `to_ray_series()` method
# 98. Using the `to_pyspark_series()` method
# 99. Using the `to_dask_dataframe()` method
# 100. Using the `to_modin_dataframe()` method
# 101. Using the `to_ray_dataframe()` method
# 102. Using the `to_pyspark_dataframe()` method
# 103. Using the `to_dask_array()` method
# 104. Using the `to_modin_array()` method
# 105. Using the `to_ray_array()` method
# 106. Using the `to_pyspark_array()` method
# 107. Using the `to_dask_series()` method
# 108. Using the `to_modin_series()` method
# 109. Using the `to_ray_series()` method
# 110. Using the `to_pyspark_series()` method
# 111. Using the `to_dask_dataframe()` method
# 112. Using the `to_modin_dataframe()` method


Unnamed: 0,population,houses
Mondstadt,8,10
Sumeru,12,14


In [None]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj
# Reindexing the Series with fill value, method, and limit


a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64

In [None]:
obj['b']
# Accessing the value at index 'b' in the Series



np.float64(1.0)

In [None]:
obj[1]

  obj[1]


np.float64(1.0)

In [None]:
obj[2:4]

c    2.0
d    3.0
dtype: float64

In [None]:
obj[['b', 'a', 'd']]  
# Accessing multiple values in the Series using a list of indices


b    1.0
a    0.0
d    3.0
dtype: float64

In [None]:
obj[[1, 3]]
# Accessing multiple values in the Series using a list of integer positions

  obj[[1, 3]]


b    1.0
d    3.0
dtype: float64

In [None]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
obj[obj < 2.0]
# Accessing values in the Series that are less than 2

a    0.0
b    1.0
dtype: float64

In [None]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd']) 
# Accessing the Series with a boolean mask top gain float values
obj['b':'c']

b    1.0
c    2.0
dtype: float64

In [None]:
obj = pd.Series(np.arange(4.), index=['a', 'b', 'c', 'd'])
# Accessing the Series with a boolean mask to gain float values
obj.loc[['b', 'a', 'd']]
# Accessing multiple values in the Series using a list of indices

b    1.0
a    0.0
d    3.0
dtype: float64

In [None]:
obj4 = pd.Series([1,2,3], index=['2','0','1'])
obj5 = pd.Series([1,2,3], index=['a','b','c'])

obj4
# Accessing the Series with a boolean mask to gain float values

2    1
0    2
1    3
dtype: int64

In [None]:
obj5
 

a    1
b    2
c    3
dtype: int64

In [None]:
obj4[[0,1,2]]

# Accessing multiple values in the 
# Series using a list of integer positions
# Sorting the Series by index
 

  obj4[[0,1,2]]


2    1
0    2
1    3
dtype: int64

In [None]:
obj5[['a','b','c']]
# Accessing multiple values in the Series using a list of integer positions

a    1
b    2
c    3
dtype: int64

In [None]:
obj5  

a    1
b    2
c    3
dtype: int64

In [None]:
obj5[[0,1,2]]

  obj5[[0,1,2]]


a    1
b    2
c    3
dtype: int64

In [None]:
obj5.loc[['a','b','c']]
# Accessing multiple values in the Series using a list of indices

a    1
b    2
c    3
dtype: int64

In [None]:
obj5.loc[[0,1]]
# demonstrating the use of loc to access multiple values 
# in the Series using a list of indices
# showing the error when trying to access non-existent indices

KeyError: "None of [Index([0, 1], dtype='int64')] are in the [index]"

In [None]:
obj5.iloc[[0,1,2]]
# Accessing multiple values in the Series using 
# a list of integer positions

a    1
b    2
c    3
dtype: int64

In [None]:
obj4.iloc[[0,1,2]]


2    1
0    2
1    3
dtype: int64

In [None]:
obj5.loc["b":"c"]
# Accessing a range of values in the Series 
# using label-based indexing

b    2
c    3
dtype: int64

In [None]:
data = pd.DataFrame(np.arange(16).reshape((4, 4)),
                     index=['Inazuma', 'Liyue', 'Mondstadt', 'Sumeru'],
                     columns=['population', 'hero', 'houses', 'S class']
                     )
data
 

Unnamed: 0,population,hero,houses,S class
Inazuma,0,1,2,3
Liyue,4,5,6,7
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [None]:
data['hero']

Inazuma       1
Liyue         5
Mondstadt     9
Sumeru       13
Name: hero, dtype: int64

In [None]:
data[['houses','population']]
# Accessing multiple columns in the DataFrame 
# using a list of column names

Unnamed: 0,houses,population
Inazuma,2,0
Liyue,6,4
Mondstadt,10,8
Sumeru,14,12


In [None]:
data[:2]
# Accessing the first two rows of the DataFrame

Unnamed: 0,population,hero,houses,S class
Inazuma,0,1,2,3
Liyue,4,5,6,7


In [None]:
data[data['houses'] > 5]

Unnamed: 0,population,hero,houses,S class
Liyue,4,5,6,7
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [None]:
data < 5
# Accessing Boolean values in the DataFrame that are less than 5

Unnamed: 0,population,hero,houses,S class
Inazuma,True,True,True,True
Liyue,True,False,False,False
Mondstadt,False,False,False,False
Sumeru,False,False,False,False


In [None]:
data[data < 5] = 0
# Setting values in the DataFrame that are less than 5 to 0
data

Unnamed: 0,population,hero,houses,S class
Inazuma,0,0,0,0
Liyue,0,5,6,7
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [None]:
# Selection on DataFrame with loc and iloc
data.loc['Liyue']
# Accessing the row with index 'Liyue' in the DataFrame

population    0
hero          5
houses        6
S class       7
Name: Liyue, dtype: int64

In [None]:
# Selection on DataFrame with loc and iloc
data.loc['Liyue']
# Accessing the row with index 'Liyue' in the DataFrame

population    0
hero          5
houses        6
S class       7
Name: Liyue, dtype: int64

In [None]:
data.loc[['Liyue', 'Sumeru'], ['population', 'hero']]

Unnamed: 0,population,hero
Liyue,0,5
Sumeru,12,13


In [None]:
data.loc[['Liyue', 'Sumeru']]
# Accessing the rows with indices 
# 'Liyue' and 'Sumeru' in the DataFrame

Unnamed: 0,population,hero,houses,S class
Liyue,0,5,6,7
Sumeru,12,13,14,15


In [None]:
data.loc['Liyue',['hero','houses']]
# Accessing the row with index 'Liyue' and columns 
# 'hero' and 'houses' in the DataFrame

hero      5
houses    6
Name: Liyue, dtype: int64

In [None]:
data.iloc[2]
# Accessing the row at index 2 in the DataFrame

population     8
hero           9
houses        10
S class       11
Name: Mondstadt, dtype: int64

In [None]:
data.iloc[[2,1]]
# Accessing the rows at indices 2 and 1 in the DataFrame
# using number of array

Unnamed: 0,population,hero,houses,S class
Mondstadt,8,9,10,11
Liyue,0,5,6,7


In [None]:
data.iloc[2, [3,0,1]]
# Accessing the row at index 2 and 
# columns at indices 3, 0, and 1 in the DataFrame

S class       11
population     8
hero           9
Name: Mondstadt, dtype: int64

In [None]:
data.iloc[[1, 2], [3,0, 1]]
# Accessing the rows at indices 1 and 2 and
# columns at indices 3, 0, and 1 in the DataFrame

Unnamed: 0,S class,population,hero
Liyue,7,0,5
Mondstadt,11,8,9


In [None]:
data.loc[:"Mondstadt", "hero"]
# Accessing the rows up to 'Mondstadt' and 
# the 'hero' column in the DataFrame


Inazuma      0
Liyue        5
Mondstadt    9
Name: hero, dtype: int64

In [None]:
data.iloc[:, :3][data.houses > 5]
# Accessing all rows and the first three columns
# of the DataFrame where the 'houses' column is greater than 5

Unnamed: 0,population,hero,houses
Liyue,0,5,6
Mondstadt,8,9,10
Sumeru,12,13,14


In [None]:
data.loc[data.houses >= 2]
# Accessing the rows where 
# the 'houses' column is greater than or equal to 2

Unnamed: 0,population,hero,houses,S class
Liyue,0,5,6,7
Mondstadt,8,9,10,11
Sumeru,12,13,14,15


In [None]:
# Integer Indexing pitfalls

ser = pd.Series(np.arange(3.0))
ser
# Accessing the Series with integer indexingS

0    0.0
1    1.0
2    2.0
dtype: float64

In [None]:
ser[-1]
# Accessing the last element of the Series using 
# negative indexing

KeyError: -1

In [None]:
ser


0    0.0
1    1.0
2    2.0
dtype: float64

In [None]:
ser2 = pd.Series(np.arange(3.0), index=['a', 'b', 'c'])
ser2    
# Accessing the Series with a custom index


a    0.0
b    1.0
c    2.0
dtype: float64

In [None]:
ser2[-1]
# Accessing the last element of the Series using negative indexing
# This will raise an error because the index is not an integer


  ser2[-1]


np.float64(2.0)

In [202]:
ser.iloc[-1]
# Accessing the last element of the Series using iloc
# This will work correctly because iloc uses integer-based indexing


np.float64(2.0)

In [203]:
ser[:2]
# Accessing the first two elements of the Series using slicing
# This will work correctly because slicing is supported in pandas


0    0.0
1    1.0
dtype: float64

In [204]:
# Pitfall of using integer indexing with custom index
# Pitfaal with chained indexing

data.loc[:, 'population'] = 1
# Setting the 'population' column to 1 for all rows
data

Unnamed: 0,population,hero,houses,S class
Inazuma,1,1,2,3
Liyue,1,5,6,7
Mondstadt,1,9,10,11
Sumeru,1,13,14,15


In [None]:
# Pitfall of using integer indexing with custom index
# Pitfaal with chained indexing

data.loc[:, 'population'] = 1
# Setting the 'population' column to 1 for all rows
data

Unnamed: 0,population,hero,houses,S class
Inazuma,1,1,2,3
Liyue,1,5,6,7
Mondstadt,1,9,10,11
Sumeru,1,13,14,15


In [None]:
data.iloc[2] = 5
# Setting the row at index 2 to 5
data
# Accessing the DataFrame with integer indexing

Unnamed: 0,population,hero,houses,S class
Inazuma,1,1,2,3
Liyue,1,5,6,7
Mondstadt,5,5,5,5
Sumeru,1,13,14,15


In [210]:
data.loc[data["houses"] > 5] = 3
# Setting the rows where 'houses' is greater than 5 to 3
data
# Accessing the DataFrame with chained indexing

Unnamed: 0,population,hero,houses,S class
Inazuma,1,1,2,3
Liyue,3,3,3,3
Mondstadt,5,5,5,5
Sumeru,3,3,3,3


In [212]:
data.loc[data.houses == 5]["houses"] = 6
# Setting the rows where 'houses' is equal to 5 to 6
# Accessing the DataFrame with chained indexing

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.loc[data.houses == 5]["houses"] = 6


In [None]:

# Accessing the DataFrame with chained indexing
data.loc[data.houses == 5, "houses"] = 6   
data 


Unnamed: 0,population,hero,houses,S class
Inazuma,1,1,2,3
Liyue,3,3,3,3
Mondstadt,5,5,6,5
Sumeru,3,3,3,3


: 