# Series

In [1]:
import pandas as pd

In [2]:
# Creating a Series from a list
s1 = pd.Series([10, 20, 30, 40, 50])
print("Series from list:\n", s1)

Series from list:
 0    10
1    20
2    30
3    40
4    50
dtype: int64


In [3]:
# Creating a Series with custom index
s2 = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
print("\nSeries with custom index:\n", s2)




Series with custom index:
 a    10
b    20
c    30
d    40
e    50
dtype: int64


In [4]:
# Creating Series from dictionary
data = {'a': 100, 'b': 200, 'c': 300}
s3 = pd.Series(data)
print("\nSeries from dictionary:\n", s3)


Series from dictionary:
 a    100
b    200
c    300
dtype: int64


In [5]:
# Accessing elements
print("\nElement at index 2:", s1[2])
print("Element at label 'c':", s2['c'])


Element at index 2: 30
Element at label 'c': 30


In [6]:
# Series operations
print("\nSeries addition:\n", s1 + s1)
print("\nSeries multiplication:\n", s1 * 2)


Series addition:
 0     20
1     40
2     60
3     80
4    100
dtype: int64

Series multiplication:
 0     20
1     40
2     60
3     80
4    100
dtype: int64


In [7]:
# Boolean indexing
print("\nValues greater than 25:\n", s1[s1 > 25])


Values greater than 25:
 2    30
3    40
4    50
dtype: int64


# DataFrame

In [8]:
# Creating DataFrame from dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [24, 27, 22, 32],
    'Salary': [50000, 60000, 55000, 65000]
}
df = pd.DataFrame(data)
print("DataFrame:\n", df)

DataFrame:
       Name  Age  Salary
0    Alice   24   50000
1      Bob   27   60000
2  Charlie   22   55000
3    David   32   65000


In [9]:
# Creating DataFrame from list of lists
df2 = pd.DataFrame(
    [[1, 'A'], [2, 'B'], [3, 'C']],
    columns=['ID', 'Category']
)
print("\nDataFrame from list of lists:\n", df2)


DataFrame from list of lists:
    ID Category
0   1        A
1   2        B
2   3        C


In [10]:
# Viewing DataFrame
print("\nFirst two rows:\n", df.head(2))
print("\nSummary statistics:\n", df.describe())
print("\nInfo:\n")
print(df.info())


First two rows:
     Name  Age  Salary
0  Alice   24   50000
1    Bob   27   60000

Summary statistics:
              Age        Salary
count   4.000000      4.000000
mean   26.250000  57500.000000
std     4.349329   6454.972244
min    22.000000  50000.000000
25%    23.500000  53750.000000
50%    25.500000  57500.000000
75%    28.250000  61250.000000
max    32.000000  65000.000000

Info:

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    4 non-null      object
 1   Age     4 non-null      int64 
 2   Salary  4 non-null      int64 
dtypes: int64(2), object(1)
memory usage: 228.0+ bytes
None


In [11]:
# Accessing columns
print("\nAccess Age column:\n", df['Age'])


Access Age column:
 0    24
1    27
2    22
3    32
Name: Age, dtype: int64


In [12]:
# Accessing rows by index
print("\nAccess row by loc:\n", df.loc[2])   # by label
print("\nAccess row by iloc:\n", df.iloc[1]) # by index


Access row by loc:
 Name      Charlie
Age            22
Salary      55000
Name: 2, dtype: object

Access row by iloc:
 Name        Bob
Age          27
Salary    60000
Name: 1, dtype: object


In [13]:
# Filtering
print("\nPeople with Age > 25:\n", df[df['Age'] > 25])


People with Age > 25:
     Name  Age  Salary
1    Bob   27   60000
3  David   32   65000


In [14]:
# Adding a new column
df['Bonus'] = df['Salary'] * 0.1
print("\nDataFrame with Bonus column:\n", df)


DataFrame with Bonus column:
       Name  Age  Salary   Bonus
0    Alice   24   50000  5000.0
1      Bob   27   60000  6000.0
2  Charlie   22   55000  5500.0
3    David   32   65000  6500.0


In [15]:
# Dropping a column
df = df.drop('Bonus', axis=1)
print("\nAfter dropping Bonus column:\n", df)


After dropping Bonus column:
       Name  Age  Salary
0    Alice   24   50000
1      Bob   27   60000
2  Charlie   22   55000
3    David   32   65000


In [16]:
# Sorting
print("\nSorted by Salary:\n", df.sort_values('Salary', ascending=False))


Sorted by Salary:
       Name  Age  Salary
3    David   32   65000
1      Bob   27   60000
2  Charlie   22   55000
0    Alice   24   50000


In [17]:
# Grouping
print("\nGroup by Age:\n", df.groupby('Age')['Salary'].mean())


Group by Age:
 Age
22    55000.0
24    50000.0
27    60000.0
32    65000.0
Name: Salary, dtype: float64


In [18]:
# Handling Missing Values
data2 = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [24, None, 22, 32],
    'Salary': [50000, 60000, None, 65000]
}
df2 = pd.DataFrame(data2)
print("\nDataFrame with NaN values:\n", df2)


DataFrame with NaN values:
       Name   Age   Salary
0    Alice  24.0  50000.0
1      Bob   NaN  60000.0
2  Charlie  22.0      NaN
3    David  32.0  65000.0


In [19]:
print("\nDrop rows with NaN:\n", df2.dropna())


Drop rows with NaN:
     Name   Age   Salary
0  Alice  24.0  50000.0
3  David  32.0  65000.0


In [20]:
print("\nFill NaN with default value:\n", df2.fillna(0))


Fill NaN with default value:
       Name   Age   Salary
0    Alice  24.0  50000.0
1      Bob   0.0  60000.0
2  Charlie  22.0      0.0
3    David  32.0  65000.0


In [21]:
# Merging two DataFrames
df_a = pd.DataFrame({'ID': [1,2,3], 'Name': ['Alice', 'Bob', 'Charlie']})
df_b = pd.DataFrame({'ID': [1,2,3], 'Score': [85, 90, 95]})
merged = pd.merge(df_a, df_b, on='ID')
print("\nMerged DataFrame:\n", merged)


Merged DataFrame:
    ID     Name  Score
0   1    Alice     85
1   2      Bob     90
2   3  Charlie     95


In [22]:
# Concatenation
df_c1 = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
df_c2 = pd.DataFrame({'A': [5, 6], 'B': [7, 8]})
concat = pd.concat([df_c1, df_c2])
print("\nConcatenated DataFrame:\n", concat)


Concatenated DataFrame:
    A  B
0  1  3
1  2  4
0  5  7
1  6  8


In [23]:
# Pivot Table
df_pivot = pd.DataFrame({
    'Department': ['HR', 'HR', 'IT', 'IT'],
    'Employee': ['Alice', 'Bob', 'Charlie', 'David'],
    'Salary': [50000, 52000, 60000, 65000]
})
pivot = df_pivot.pivot_table(values='Salary', index='Department', aggfunc='mean')
print("\nPivot Table:\n", pivot)


Pivot Table:
              Salary
Department         
HR          51000.0
IT          62500.0
