In [None]:
import numpy as np

# Create a sample array
x = np.array([[1, 2], [3, 4]])

# Print the original array
print("Original array:")
print(x)

# Print elements using various slicing expressions
print("\nA) x[:, -2:] (all elements from second column onwards)")
print(x[:, -2:])  # This selects all elements from the second column onwards

print("\nB) x[:, -1] (last column)")
print(x[:, -1])  # This selects the last column (including the last two elements of the first column)

print("\nC) x[:, 1: -1] (all elements except first and last columns)")
print(x[:, 1: -1])  # This selects all elements except the first and last columns (empty in this case)

print("\nD) x[:, 1] (second column)")
print(x[:, 1])  # This selects the second column (including the last two elements of the first column)


Original array:
[[1 2]
 [3 4]]

A) x[:, -2:] (all elements from second column onwards)
[[1 2]
 [3 4]]

B) x[:, -1] (last column)
[2 4]

C) x[:, 1: -1] (all elements except first and last columns)
[]

D) x[:, 1] (second column)
[2 4]


In [None]:
x= np.array([[5, 0, 3, 3],[7, 9, 3, 5],[2, 4, 7, 6 ]])
print(x)
print(np.sum(x<6,axis=0))

[[5 0 3 3]
 [7 9 3 5]
 [2 4 7 6]]
[2 2 2 2]


***Hierarchical indexing***
Hierarchical indexing, also known as multi-level indexing, is a feature in pandas that enables you to manage and work with data in multiple dimensions. It allows you to create DataFrame or Series objects with more than one level of row or column labels, providing a way to organize and structure complex datasets.

Syntax: ***bold text***

index = pd.MultiIndex.from_tuples(
    [('Level1', 'A'), ('Level1', 'B'), ('Level2', 'C'), ('Level2', 'D')],
    names=['First_Level', 'Second_Level']  # Optional: Names for the levels
)

In [None]:
import pandas as pd


df = pd.DataFrame({
    'A': [1, 2, 3, 4, 5],
    'B': [6, 7, 8, 9, 10],
    'C': [11, 12, 13, 14, 15]
}, index=pd.MultiIndex.from_tuples([('Group1', 'A'), ('Group1', 'B'), ('Group2', 'A'), ('Group2', 'B'), ('Group2', 'C')], names=['Group', 'Variable']))

# Display the DataFrame
print("DataFrame with hierarchical index:")
print(df)

# Access data using hierarchical index
print("\nData for 'Group1':")
print(df.loc['Group1'])

print("\nData for 'Variable' A:")
print(df.loc[:, 'A'])

print("\nData for 'Group1' and 'Variable' B:")
print(df.loc[('Group1', 'B')])


DataFrame with hierarchical index:
                 A   B   C
Group  Variable           
Group1 A         1   6  11
       B         2   7  12
Group2 A         3   8  13
       B         4   9  14
       C         5  10  15

Data for 'Group1':
          A  B   C
Variable          
A         1  6  11
B         2  7  12

Data for 'Variable' A:
Group   Variable
Group1  A           1
        B           2
Group2  A           3
        B           4
        C           5
Name: A, dtype: int64

Data for 'Group1' and 'Variable' B:
A     2
B     7
C    12
Name: (Group1, B), dtype: int64


Pivot table allows you to transform and aggregate data in a tabular format, providing insights into the relationships between different variables.

Typically it involves
1. Rows and Columns
2. values
3. Aggregate Function
4. Filtering adn Sorting
5. Display

# Syntax for pivot table
pivot_df = df.pivot_table(
    index='row_column',        # Column(s) to use as the row index

    columns='column_column',   # Column(s) to use as the column index

    values='value_column',     # Column containing the values to be summarized

    aggfunc='aggregation_function',  # Aggregation function to be applied to the values

    fill_value=None,           # Optional: Value to replace missing values

    margins=False,             # Optional: Whether to include subtotal margins
    
    dropna=True                # Optional: Whether to exclude rows/columns with missing values
)

In [None]:
import pandas as pd

data = {
    'Date': ['2022-01-01', '2022-01-01', '2022-01-02', '2022-01-02', '2022-01-03'],
    'Product': ['A', 'B', 'A', 'B', 'A'],
    'Sales': [100, 200, 150, 180, 120],
    'Region': ['North', 'North', 'South', 'South', 'North']
}

df = pd.DataFrame(data)
print(df)


         Date Product  Sales Region
0  2022-01-01       A    100  North
1  2022-01-01       B    200  North
2  2022-01-02       A    150  South
3  2022-01-02       B    180  South
4  2022-01-03       A    120  North


In [None]:
pivot_df = df.pivot_table(index='Product', columns='Region', values='Sales', aggfunc='sum')
print(pivot_df)


Region   North  South
Product              
A          220    150
B          200    180


In [None]:
pivot_mean = df.pivot_table(index='Product', columns='Region', values='Sales', aggfunc='mean')
print("Mean (average) sales:")
print(pivot_mean)


Mean (average) sales:
Region   North  South
Product              
A          110    150
B          200    180


In [None]:
pivot_min = df.pivot_table(index='Product', columns='Region', values='Sales', aggfunc='min')
print("\nMinimum sales:")
print(pivot_min)



Minimum sales:
Region   North  South
Product              
A          100    150
B          200    180


In [None]:
pivot_max = df.pivot_table(index='Product', columns='Region', values='Sales', aggfunc='max')
print("\nMaximum sales:")
print(pivot_max)



Maximum sales:
Region   North  South
Product              
A          120    150
B          200    180


In [None]:
import pandas as pd


data = {
    'Student': ['raja', 'kumar', 'raja', 'kumar', 'raja', 'kumar'],
    'Subject': ['DS', 'DS', 'DAA', 'DAA', 'TOC', 'TOC'],
    'Score': [85, 90, 88, 92, 75, 80],
    'Grade': ['A', 'A', 'A', 'A', 'B', 'B']
}

# Create DataFrame
df = pd.DataFrame(data)
print(df)

# 1. Average score for each student in each subject
pivot_avg_score = df.pivot_table(index='Student', columns='Subject', values='Score', aggfunc='mean')
print("1. Average score for each student in each subject:")
print(pivot_avg_score)

# 2. Maximum score achieved by each student in each subject
pivot_max_score = df.pivot_table(index='Student', columns='Subject', values='Score', aggfunc='max')
print("\n2. Maximum score achieved by each student in each subject:")
print(pivot_max_score)

# 3. Number of subjects each student has taken
pivot_subject_count = df.pivot_table(index='Student', values='Subject', aggfunc='count')
print("\n3. Number of subjects each student has taken:")
print(pivot_subject_count)

# 4. Average score for each grade in each subject
pivot_avg_score_grade = df.pivot_table(index='Grade', columns='Subject', values='Score', aggfunc='mean')
print("\n4. Average score for each grade in each subject:")
print(pivot_avg_score_grade)


  Student Subject  Score Grade
0    raja      DS     85     A
1   kumar      DS     90     A
2    raja     DAA     88     A
3   kumar     DAA     92     A
4    raja     TOC     75     B
5   kumar     TOC     80     B
1. Average score for each student in each subject:
Subject  DAA  DS  TOC
Student              
kumar     92  90   80
raja      88  85   75

2. Maximum score achieved by each student in each subject:
Subject  DAA  DS  TOC
Student              
kumar     92  90   80
raja      88  85   75

3. Number of subjects each student has taken:
         Subject
Student         
kumar          3
raja           3

4. Average score for each grade in each subject:
Subject   DAA    DS   TOC
Grade                    
A        90.0  87.5   NaN
B         NaN   NaN  77.5
