# Working with Data

### 1.  Loading Data Using `np.loadtxt()`




In [50]:
import numpy as np

# Load data from a file named data.txt, assuming it's space-separated
data = np.loadtxt("data.txt")
data

array([[1. , 2.5, 3.1],
       [4.2, 5.3, 6.4],
       [7.5, 8.8, 9.9]])

### 2.  Loading Data Using `np.genfromtxt()`
## -----------------------------------------------

#### np.genfromtxt() reads data while handling missing values.
#### delimiter specifies how values are separated (e.g., commas).
#### Missing entries are represented as np.nan in the array


In [51]:
# Load data with missing values from a file named data_with_missing.txt, treating empty cells as np.nan
data = np.genfromtxt("data_with_missing.txt", delimiter=",")
data

array([[1. , 2.5, 3.1],
       [4.2, nan, 6.4],
       [7.5, 8.8, 9.9]])

### 3.  Saving Data Using `np.savetxt()`in a csv file


In [52]:
# Create some data
data = np.array([[1.0, 2.5, 3.1], [4.2, 5.3, 6.4], [7.5, 8.8, 9.9]])

# Save to a file with comma as a delimiter. it will automatically create a csv file if there will be none and save the array data to it.
np.savetxt("saved_data.csv", data, delimiter=",")

### 4. Working with CSV and delimited files: Loading a CSV File


In [53]:
# Load data from a CSV file
data = np.loadtxt("saved_data.csv", delimiter=",")
data

array([[1. , 2.5, 3.1],
       [4.2, 5.3, 6.4],
       [7.5, 8.8, 9.9]])

## working with tsv file

In [54]:
#Creating some real-world-like data (Employee ID, Age, Salary in USD)
employee_data = np.array([
    [101, 25, 50000],
    [102, 30, 60000],
    [103, 22, 45000],
    [104, 28, 70000],
    [105, 35, 80000]
])

In [55]:
#Save the employee data as a TSV file.Similar to csv, this file will also get automatically generated in working directory.
np.savetxt("employee_data.tsv", employee_data, delimiter="\t", fmt='%d')
print("Employee data saved to 'employee_data.tsv'.")

Employee data saved to 'employee_data.tsv'.


In [56]:
# Load the data from the TSV file
loaded_employee_data = np.genfromtxt("employee_data.tsv", delimiter="\t", dtype=int)
loaded_employee_data


array([[  101,    25, 50000],
       [  102,    30, 60000],
       [  103,    22, 45000],
       [  104,    28, 70000],
       [  105,    35, 80000]])

### 5.  Handling Missing Data with `np.nan`
#### This is useful for cleaning data before analysis

In [57]:
# Example array with np.nan representing missing data
data_with_nan = np.array([[1.0, 2.5, np.nan], [4.2, np.nan, 6.4], [7.5, 8.8, 9.9]])

# Check for missing values
np.isnan(data_with_nan)

array([[False, False,  True],
       [False,  True, False],
       [False, False, False]])

In [58]:
# Fill missing values (e.g., replace np.nan with 0)
filled_data = np.nan_to_num(data_with_nan, nan=0.0)
filled_data

array([[1. , 2.5, 0. ],
       [4.2, 0. , 6.4],
       [7.5, 8.8, 9.9]])