### Numpy assignment

In [134]:
import numpy as np
import pandas as pd

# Task 1: Numpy basics

1. Array creation

In [135]:
# Create a 1D and a 2D NumPy array
arr_1d = np.array([4, 5, 6, 7, 8])
print("1D Array:", arr_1d)

1D Array: [4 5 6 7 8]


In [136]:
arr_2d = np.array([[10, 20, 30], [40, 50, 60]])
print("2D Array:\n", arr_2d)

2D Array:
 [[10 20 30]
 [40 50 60]]


2. Array operations

In [137]:
# Perform basic arithmetic operations on the arrays
arr_1d_add = arr_1d + 5
arr_2d_mult = arr_2d * 3
print("1D Array + 5:", arr_1d_add)
print("2D Array * 3:\n", arr_2d_mult)

1D Array + 5: [ 9 10 11 12 13]
2D Array * 3:
 [[ 30  60  90]
 [120 150 180]]


3. Indexing and Slicing

In [138]:
# Demonstrate indexing and slicing on the arrays
print("First element of 1D array:", arr_1d[0])
print("First row of 2D array:\n", arr_2d[0, :])

First element of 1D array: 4
First row of 2D array:
 [10 20 30]


4. Shape and Reshape

In [139]:
# Show how to check the shape of an array and reshape it
print("Shape of 2D array:", arr_2d.shape)
reshaped_arr = arr_2d.reshape((6, 1))
print("Reshaped 2D Array:\n", reshaped_arr)

Shape of 2D array: (2, 3)
Reshaped 2D Array:
 [[10]
 [20]
 [30]
 [40]
 [50]
 [60]]


# Task 2: Pandas Basics

1. Series and DataFrame Creation

In [140]:
# Create a Pandas Series and DataFrame
s = pd.Series([6, 7, 8, 9, 10])
print("Pandas Series:\n", s)

df = pd.DataFrame({
    'A': [11, 12, 10],
    'B': [14, 15, 16],
    'C': [17, 18, 19]
})
print("Pandas DataFrame:\n", df)

Pandas Series:
 0     6
1     7
2     8
3     9
4    10
dtype: int64
Pandas DataFrame:
     A   B   C
0  11  14  17
1  12  15  18
2  10  16  19


2. Reading Data

In [141]:
# Read data from a CSV file (ensure you have a sample CSV file in your repository)
df_read = pd.read_csv('sample_data.csv')
print("Read DataFrame from CSV:\n", df_read)

Read DataFrame from CSV:
     A   B   C
0  11  14  17
1  12  15  18
2  13  16  19


3. Data Inspection


In [142]:
# Perform basic data inspection methods
print("Head of DataFrame:\n", df.head())
print("Info of DataFrame:\n", df.info())
print("Description of DataFrame:\n", df.describe())

Head of DataFrame:
     A   B   C
0  11  14  17
1  12  15  18
2  10  16  19
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       3 non-null      int64
 1   B       3 non-null      int64
 2   C       3 non-null      int64
dtypes: int64(3)
memory usage: 204.0 bytes
Info of DataFrame:
 None
Description of DataFrame:
           A     B     C
count   3.0   3.0   3.0
mean   11.0  15.0  18.0
std     1.0   1.0   1.0
min    10.0  14.0  17.0
25%    10.5  14.5  17.5
50%    11.0  15.0  18.0
75%    11.5  15.5  18.5
max    12.0  16.0  19.0


4. Data Manipulation

In [143]:
# Selecting Data
print("Column A:\n", df['A'])
print("Columns A and B:\n", df[['A', 'B']])

Column A:
 0    11
1    12
2    10
Name: A, dtype: int64
Columns A and B:
     A   B
0  11  14
1  12  15
2  10  16


In [144]:
# Filtering Data
filtered_df = df[df['A'] > 1]
print("Filtered DataFrame (A > 1):\n", filtered_df)

Filtered DataFrame (A > 1):
     A   B   C
0  11  14  17
1  12  15  18
2  10  16  19


In [145]:
# Adding/Removing Columns
df['D'] = df['A'] + df['C']
print("DataFrame with new column D:\n", df)
df.drop('D', axis=1, inplace=True)
print("DataFrame after dropping column D:\n", df)

DataFrame with new column D:
     A   B   C   D
0  11  14  17  28
1  12  15  18  30
2  10  16  19  29
DataFrame after dropping column D:
     A   B   C
0  11  14  17
1  12  15  18
2  10  16  19


In [146]:
# Handling Missing Data
df_with_nan = df.copy()
df_with_nan.loc[1, 'A'] = np.nan
print("DataFrame with NaN:\n", df_with_nan)
print("DataFrame after dropping NaN:\n", df_with_nan.dropna())
print("DataFrame after filling NaN with 0:\n", df_with_nan.fillna(0))

DataFrame with NaN:
       A   B   C
0  11.0  14  17
1   NaN  15  18
2  10.0  16  19
DataFrame after dropping NaN:
       A   B   C
0  11.0  14  17
2  10.0  16  19
DataFrame after filling NaN with 0:
       A   B   C
0  11.0  14  17
1   0.0  15  18
2  10.0  16  19


### Task 3: Data Analysis with Pandas

In [147]:
# Aggregations
print("Mean of column C:", df['C'].mean())
print("Sum of column C:", df['C'].sum())

Mean of column C: 18.0
Sum of column C: 54


In [148]:
# Merging and Joining
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value': [4, 5, 6]})
merged_df = pd.merge(df1, df2, on='key', how='inner')
print("Merged DataFrame:\n", merged_df)

Merged DataFrame:
   key  value_x  value_y
0   A        1        4
1   B        2        5


In [149]:
# Pivot Tables
pivot_df = df.pivot_table(values='C', index='A', columns='B', aggfunc='mean')
print("Pivot Table:\n", pivot_df)

Pivot Table:
 B     14    15    16
A                   
10   NaN   NaN  19.0
11  17.0   NaN   NaN
12   NaN  18.0   NaN


In [150]:
# Time Series Analysis
df['date'] = pd.date_range(start='1/1/2020', periods=len(df), freq='D')
df.set_index('date', inplace=True)
print("DataFrame with Date Index:\n", df)
resampled_df = df.resample('M').mean()
print("Resampled DataFrame:\n", resampled_df)

DataFrame with Date Index:
              A   B   C
date                  
2020-01-01  11  14  17
2020-01-02  12  15  18
2020-01-03  10  16  19
Resampled DataFrame:
                A     B     C
date                        
2020-01-31  11.0  15.0  18.0
