In [2]:
import pandas as pd
import numpy as np

In [3]:
s = pd.Series([1,2,3,4,5])
s

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [4]:
##### series with custom index values
s = pd.Series([1,2,3,4,5],index = ['a','b','c','d','e'])
s

a    1
b    2
c    3
d    4
e    5
dtype: int64

In [None]:
data = {'a':1,"b":2,"c":3}
s = pd.Series(data)
s

a    1
b    2
c    3
dtype: int64

**** Series = one value per index label

In [16]:
data = {
    'a': [1, 2, 3],
    'b': [4, 5, 6],
    'c': [7, 8, 9]
}
s = pd.Series(data)
s

a    [1, 2, 3]
b    [4, 5, 6]
c    [7, 8, 9]
dtype: object

In [6]:
data = {'a':1,"b":2,"c":3}
s = pd.Series(data)
print(f"{s.values} , {s.index} , {s.name} , {s.index.name}")

[1 2 3] , Index(['a', 'b', 'c'], dtype='object') , None , None


In [None]:
s = pd.Series(
    [1, 2, 3],
    index=pd.Index(['a', 'b', 'c'], name="letters"),
    name="counts"
)
print(f"{s.values} , {s.index} , {s.name} , {s.index.name}")

[1 2 3] , Index(['a', 'b', 'c'], dtype='object', name='letters') , counts , letters


In [13]:
#### setting index.name which is used in metadata
data = {'a':1,"b":2,"c":3}
s = pd.Series(data , name="series_data")
s.index.name = "index_name"
print(f"{s.index.name}")

index_name


In [14]:
#  setting up 2d data
data = {
    'state':['a','b','c','d'],
    'year':[1,2,3,4],
    'pop':[1.2,1.3,1.4,1.5]
}


df = pd.DataFrame(data)
df

Unnamed: 0,state,year,pop
0,a,1,1.2
1,b,2,1.3
2,c,3,1.4
3,d,4,1.5


In [18]:
# checking the concept of key for 2d values or data frame
data = {
    'a':1,
    "b":2,
    "c":3
    }
s = pd.DataFrame(data,index=[0])
s

Unnamed: 0,a,b,c
0,1,2,3


In [19]:
#  setting up 2d data
data = {
    'state':['a','b','c','d'],
    'year':[1,2,3,4],
    'pop':[1.2,1.3,1.4,1.5]
}

#  showing partcular column
df = pd.DataFrame(data,columns=['year'])
df

Unnamed: 0,year
0,1
1,2
2,3
3,4


In [16]:
#  setting up 2d data
data = {
    'state':['a','b','c','d'],
    'year':[1,2,3,4],
    'pop':[1.2,1.3,1.4,1.5]
}

#  showing partcular index
df = pd.DataFrame(
    data,
    index=['one', 'two', 'three', 'four']
)
df

Unnamed: 0,state,year,pop
one,a,1,1.2
two,b,2,1.3
three,c,3,1.4
four,d,4,1.5


In [20]:
df.shape

(4, 1)

In [21]:
# INDEX IN PANDAS

index = pd.Index(['a','b','c','d'])
index = pd.DatetimeIndex(['2023-01-01', '2023-01-02'])
index = pd.RangeIndex(start=0,stop=10,step=2)

index

RangeIndex(start=0, stop=10, step=2)

In [22]:
indx  = pd.Index(['a','b'])
indx[0] = 'n'
indx # type error cannot modify

TypeError: Index does not support mutable operations

In [23]:
new_index = pd.Index(['z','y','x'])
new_index

Index(['z', 'y', 'x'], dtype='object')

In [24]:
#  INDEX OPERATIONS
indx_1 = pd.Index(['a','b','c','d'])
indx_2 = pd.Index(['b','c','d','e'])

print(f"get unique value - {indx_1.union(indx_2)} , common value - {indx_1.intersection(indx_2)} , value present in indx 1 but not in indx2 {indx_1.difference(indx_2)} ")

get unique value - Index(['a', 'b', 'c', 'd', 'e'], dtype='object') , common value - Index(['b', 'c', 'd'], dtype='object') , value present in indx 1 but not in indx2 Index(['a'], dtype='object') 


In [26]:
#  index method
df = pd.DataFrame(
    {'sales': [100, 200, 150, 300]},
    index=['store_1', 'store_2', 'store_1', 'store_3']
)

idx = df.index
idx

Index(['store_1', 'store_2', 'store_1', 'store_3'], dtype='object')

In [27]:
print(
    f"""
Unique index values (what labels exist):
{idx.unique()}

Are all index values unique? (no duplicates):
{idx.is_unique}

Is the index sorted in increasing order? (ordering check):
{idx.is_monotonic_increasing}

Which index values are in ['store_1', 'store_4']? (membership test):
{idx.isin(['store_1', 'store_4'])}
"""
)




Unique index values (what labels exist):
Index(['store_1', 'store_2', 'store_3'], dtype='object')

Are all index values unique? (no duplicates):
False

Is the index sorted in increasing order? (ordering check):
False

Which index values are in ['store_1', 'store_4']? (membership test):
[ True False  True False]



In [28]:
# Dictionary of lists to dataframe
data = {
    "name": ["Alice", "Bob", "Charlie"],
    "age": [25, 30, 35],
    "city": ["New York", "Los Angeles", "Chicago"]
}

# Convert to DataFrame
df = pd.DataFrame(data)

print(df)


      name  age         city
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [29]:
data = {
    "name": "Alice",
    "age": 25,
    "city": "New York"
}

# Convert to DataFrame
df = pd.DataFrame([data])  # Wrap in a list
print(df)

    name  age      city
0  Alice   25  New York


In [50]:
data = {
    "row1": {"name": "Alice", "age": 25},
    "row2": {"name": "Bob", "age": 30}
}

# Convert to DataFrame
df = pd.DataFrame.from_dict(data)
print(df)

       row1 row2
name  Alice  Bob
age      25   30


In [49]:
x = {
    "name": ["Alice", "Bob"],
    "age": 25  # scalar
}
df = pd.DataFrame(x)
df

Unnamed: 0,name,age
0,Alice,25
1,Bob,25


In [None]:
#  dataframe from list of dicts
data = [
    {'name': 'Alice', 'age': 25, 'city': 'New York'},
    {'name': 'Bob', 'age': 30, 'city': 'Los Angeles'},
    {'name': 'Charlie', 'age': 35, 'city': 'Chicago'},
]
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,city
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,


In [53]:
#  dataframe from list of dicts if one value is missing it will add nan
data = [
    {'name': 'Alice', 'age': 25, 'city': 'New York'},
    {'name': 'Bob', 'age': 30, 'city': 'Los Angeles'},
    {'name': 'Charlie', 'age': 35},
]
df = pd.DataFrame(data)
df

Unnamed: 0,name,age,city
0,Alice,25,New York
1,Bob,30,Los Angeles
2,Charlie,35,


In [36]:
#  from numpy arr
rng = np.random.default_rng()
data = rng.random((3,4))
#  creating data frame
arr = pd.DataFrame(data,columns=["a",'b','c','d'],index=['row1','row2','row3'])
arr

Unnamed: 0,a,b,c,d
row1,0.047378,0.257724,0.387042,0.493244
row2,0.369022,0.630596,0.837544,0.105134
row3,0.856617,0.571592,0.288651,0.260534


****numpy has structured array which can bascially store multiple dtype

In [37]:
import numpy as np

# Define a structured dtype
dtype = [
    ('name', 'U10'),
    ('age', 'i4'),
    ('weight', 'f4')
    ]

# Create a structured array
s_arr = np.array(
    [
            ('Alice', 25, 55.5),
            ('Bob', 30, 75.0),
            ('Charlie', 22, 68.2)
    ],
    dtype=dtype)

print(s_arr)


[('Alice', 25, 55.5) ('Bob', 30, 75. ) ('Charlie', 22, 68.2)]


In [68]:
#  we can convert this strcutured array to dataframe
data = pd.DataFrame(s_arr)
data

Unnamed: 0,name,age,weight
0,Alice,25,55.5
1,Bob,30,75.0
2,Charlie,22,68.199997


In [74]:
#  creating empty dataframe
print("x-------x")
df = pd.DataFrame()
print(df)
print("x-------x")
#  empty dataframe but column predefined
df = pd.DataFrame(columns=['name', 'age', 'city'])
print(df)
print("x-------x")
#  empty dataframe but column and rows defined
df = pd.DataFrame(columns=['name', 'age'], index=[0, 1, 2])
print(df)
print("x-------x")


x-------x
Empty DataFrame
Columns: []
Index: []
x-------x
Empty DataFrame
Columns: [name, age, city]
Index: []
x-------x
  name  age
0  NaN  NaN
1  NaN  NaN
2  NaN  NaN
x-------x


In [None]:
#  Aligment
#  Labels are matched, not positions. Unmatched labels produce NaN.
s1 = pd.Series([1,2,3,4,5],index=['a','b','c','d','e'])
s2 = pd.Series([2,3,4,5,6],index=['b','c','d','e','f'])
s1+s2

a     NaN
b     4.0
c     6.0
d     8.0
e    10.0
f     NaN
dtype: float64

In [15]:
df1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}, index=['x', 'y'])
df2 = pd.DataFrame({'B': [5, 6], 'C': [7, 8]}, index=['y', 'z'])
print(f"{df1}")
print(f"{df2}")

   A  B
x  1  3
y  2  4
   B  C
y  5  7
z  6  8


In [21]:
df1 = pd.DataFrame({'A': [1, 2,3], 'B': [3, 4,5]}, index=['x', 'y','z'])
df2 = pd.DataFrame({'A': [1, 2,3], 'B': [3, 4,5]}, index=['x', 'y','z'])

df1 + df2

Unnamed: 0,A,B
x,2,6
y,4,8
z,6,10


In [35]:
data = {
    "row_1": {"name": "Alice", "age": 25, "city": "Delhi"},
    "row_2": {"name": "Bob", "age": 30, "city": "Mumbai"},
    "row_3": {"name": "Charlie", "age": 28, "city": "Bangalore"}
}

df = pd.DataFrame.from_dict(data,orient='index')
#  accessing the data
df

Unnamed: 0,name,age,city
row_1,Alice,25,Delhi
row_2,Bob,30,Mumbai
row_3,Charlie,28,Bangalore
