# Numpy and Pandas assignment

In [2]:
import numpy as np
import pandas as pd

# Task 1: NumPy Basics

## 1. Array Creation

- ### Create a 1D and a 2D NumPy array

In [7]:
# Creating and assignming a 1D NumPy array
arr_1d = np.array([1,2,3,4,5]) 
print("1D Array:\n", arr_1d)

1D Array:
 [1 2 3 4 5]


In [6]:
# Creating and assignming a 2D NumPy array
arr_2d = np.array([[1,2,3],[4,5,6]])
print("2D Array:\n",arr_2d)

2D Array:
 [[1 2 3]
 [4 5 6]]


## 2. Array Operations

In [19]:
# Perform basic arithmetic operations on the arrays
arr_1d_add = arr_1d + 10 # adding 1D NumPy array  with 10
arr_2d_mult = arr_2d * 2 # Multiplying 2D NumPy array by 2

print("1D Array + 10:\n", arr_1d_add)
print("\n2D Array * 2:\n", arr_2d_mult)

1D Array + 10:
 [11 12 13 14 15]

2D Array * 2:
 [[ 2  4  6]
 [ 8 10 12]]


## 3. Indexing and Slicing

In [13]:
# Demonstrate indexing and slicing on the arryas
print("First element of 1d array:\n", arr_1d[0])
print("\nFirst row of 2D arrya:\n",arr_2d[0, :])

First element of 1d array:
 1

First row of 2D arrya:
 [1 2 3]


## 4.Shape and Reshape

In [17]:
print("2D Array:\n",arr_2d)

2D Array:
 [[1 2 3]
 [4 5 6]]


In [18]:
# Show how to check the shape of an array and reshape it
print("Shape of 2D array:", arr_2d.shape)
reshaped_arr = arr_2d.reshape((3,2))
print("Reshaped 2D Array:\n", reshaped_arr)

Shape of 2D array: (2, 3)
Reshaped 2D Array:
 [[1 2]
 [3 4]
 [5 6]]


# Task 2: Pandas Basics

## 1. Series and DataFrame Creation

In [23]:
# Create a Pandas Series and DataFrame
s = pd.Series([1,2,3,4,5])
print("Pandas Series:\n", s)

df = pd.DataFrame({
    'A': [1, 2, 3],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
})
print("Pandas DataFrame:\n", df)

Pandas Series:
 0    1
1    2
2    3
3    4
4    5
dtype: int64
Pandas DataFrame:
    A  B  C
0  1  4  7
1  2  5  8
2  3  6  9


## 2. Reading Data

In [26]:
# Read data from a CVS file (ensure you have a sample CSV file in your repository)
df_read = pd.read_csv('sample_data.csv')
print("Read DataFrame from CSV:\n", df_read)

Read DataFrame from CSV:
    A  B  C
0  1  4  7
1  2  5  8
2  3  6  9


## 3. Data Inspection

In [28]:
# perform basic data inspection methods
print("Head of DataFrame:\n", df.head())
print("\nInfo of DataFrame:\n", df.info())
print("\nDescription of DataFrame:\n", df.describe())

Head of DataFrame:
    A  B  C
0  1  4  7
1  2  5  8
2  3  6  9
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   A       3 non-null      int64
 1   B       3 non-null      int64
 2   C       3 non-null      int64
dtypes: int64(3)
memory usage: 204.0 bytes

Info of DataFrame:
 None

Description of DataFrame:
          A    B    C
count  3.0  3.0  3.0
mean   2.0  5.0  8.0
std    1.0  1.0  1.0
min    1.0  4.0  7.0
25%    1.5  4.5  7.5
50%    2.0  5.0  8.0
75%    2.5  5.5  8.5
max    3.0  6.0  9.0


## 4. Data Manipulation

In [29]:
# Selecting Data
print("Column A:\n", df['A'])
print("Columns A and B:\n", df[['A', 'B']])

Column A:
 0    1
1    2
2    3
Name: A, dtype: int64
Columns A and B:
    A  B
0  1  4
1  2  5
2  3  6


In [31]:
# Filtering Data
filtered_df = df[df['A'] > 1]
print("Filtered DataFrame (A > 1):\n", filtered_df)

Filtered DataFrame (A > 1):
    A  B  C
1  2  5  8
2  3  6  9


In [33]:
# Adding/Removing Columns
df['D'] = df['A'] + df['B']
print("DataFrame with new column D:\n", df)
df.drop('D', axis=1, inplace=True)
print("\nDataFrame after dropping column D:\n", df)

DataFrame with new column D:
    A  B  C  D
0  1  4  7  5
1  2  5  8  7
2  3  6  9  9

DataFrame after dropping column D:
    A  B  C
0  1  4  7
1  2  5  8
2  3  6  9


In [36]:
# Handling Missing Data
df_with_nan = df.copy()
df_with_nan.loc[1, 'B'] = np.nan
print("DataFrame with NaN:\n", df_with_nan)
print("\nDataFrame after dropping NaN:\n", df_with_nan.dropna())
print("\nDataFrame after filling NaN with 0:\n", df_with_nan.fillna(0))

DataFrame with NaN:
    A    B  C
0  1  4.0  7
1  2  NaN  8
2  3  6.0  9

DataFrame after dropping NaN:
    A    B  C
0  1  4.0  7
2  3  6.0  9

DataFrame after filling NaN with 0:
    A    B  C
0  1  4.0  7
1  2  0.0  8
2  3  6.0  9


# Task 3: Data Analysis with Pandas

In [39]:
# Aggregations
print("Mean of column A:", df['A'].mean())
print("Sum of column A:", df['A'].sum())

Mean of column A: 2.0
Sum of column A: 6


In [40]:
# Merging and Joining
df1 = pd.DataFrame({'key': ['A', 'B', 'C'], 'value': [1, 2, 3]})
df2 = pd.DataFrame({'key': ['A', 'B', 'D'], 'value': [4, 5, 6]})
merged_df = pd.merge(df1, df2, on='key', how='inner')
print("Merged DataFrame:\n", merged_df)

Merged DataFrame:
   key  value_x  value_y
0   A        1        4
1   B        2        5


In [41]:
# Pivot Tables
pivot_df = df.pivot_table(values='C', index='A', columns='B', aggfunc='mean')
print("Pivot Table:\n", pivot_df)

Pivot Table:
 B    4    5    6
A               
1  7.0  NaN  NaN
2  NaN  8.0  NaN
3  NaN  NaN  9.0


In [42]:
# Time Series Analysis
df['date'] = pd.date_range(start='1/1/2020', periods=len(df), freq='D')
df.set_index('date',inplace=True)
print("DataFrame with Date Index:\n", df)
resampled_df = df.resample('M').mean()
print("Resampled DataFrame:\n", resampled_df)

DataFrame with Date Index:
             A  B  C
date               
2020-01-01  1  4  7
2020-01-02  2  5  8
2020-01-03  3  6  9
Resampled DataFrame:
               A    B    C
date                     
2020-01-31  2.0  5.0  8.0
