### Handling CSV file

In [3]:
data = """
name,age
shaukat,22
hamze,23
"""

with open("data.csv", "w") as file:
    file.write(data)

with open("data.csv", "r") as file:
    contents = file.read()

print(data)
print(contents)


name,age
shaukat,22
hamze,23


name,age
shaukat,22
hamze,23



In [4]:
import pandas as pd

data_df = pd.read_csv("data.csv")
data_df

Unnamed: 0,name,age
0,shaukat,22
1,hamze,23


In [6]:
data_df.to_csv("data")

### Handling JSON

In [5]:
data_df.to_json("data.json")

In [7]:
data_json = pd.read_json("data.json")
data_json

Unnamed: 0,name,age
0,shaukat,22
1,hamze,23


### Help Func

In [9]:
help(data_df.to_json)

Help on method to_json in module pandas.core.generic:

to_json(path_or_buf: 'FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None' = None, *, orient: "Literal['split', 'records', 'index', 'table', 'columns', 'values'] | None" = None, date_format: 'str | None' = None, double_precision: 'int' = 10, force_ascii: 'bool_t' = True, date_unit: 'TimeUnit' = 'ms', default_handler: 'Callable[[Any], JSONSerializable] | None' = None, lines: 'bool_t' = False, compression: 'CompressionOptions' = 'infer', index: 'bool_t | None' = None, indent: 'int | None' = None, storage_options: 'StorageOptions | None' = None, mode: "Literal['a', 'w']" = 'w') -> 'str | None' method of pandas.core.frame.DataFrame instance
    Convert the object to a JSON string.

    Note NaN's and None will be converted to null and datetime objects
    will be converted to UNIX timestamps.

    Parameters
    ----------
    path_or_buf : str, path object, file-like object, or None, default None
        String, path object (imple

In [10]:
help(open)

Help on function open in module _io:

open(file, mode='r', buffering=-1, encoding=None, errors=None, newline=None, closefd=True, opener=None)
    Open file and return a stream.  Raise OSError upon failure.

    file is either a text or byte string giving the name (and the path
    if the file isn't in the current working directory) of the file to
    be opened or an integer file descriptor of the file to be
    wrapped. (If a file descriptor is given, it is closed when the
    returned I/O object is closed, unless closefd is set to False.)

    mode is an optional string that specifies the mode in which the file
    is opened. It defaults to 'r' which means open for reading in text
    mode.  Other common values are 'w' for writing (truncating the file if
    it already exists), 'x' for creating and writing to a new file, and
    'a' for appending (which on some Unix systems, means that all writes
    append to the end of the file regardless of the current seek position).
    In text m

### String IO

In [11]:
data

'\nname,age\nshaukat,22\nhamze,23\n'

In [12]:
# will cause error
pd.read_csv(data)

OSError: [Errno 22] Invalid argument: '\nname,age\nshaukat,22\nhamze,23\n'

In [14]:
# the StringIO converts the string to a file like obj. That we can directly treat like a file.
from io import StringIO

data_str = StringIO(data)
data_str_df = pd.read_csv(data_str)
data_str_df


Unnamed: 0,name,age
0,shaukat,22
1,hamze,23


### NAN

In [15]:
data_df

Unnamed: 0,name,age
0,shaukat,22
1,hamze,23


In [23]:
import numpy as np

# 1. way
print(f"Pd NA: {pd.NA}")
data_df.loc[1, "name"] = pd.NA

# 2. way
print(f"NP NA: {np.nan}")
data_df.loc[1, "name"] = np.nan

data_df

Pd NA: <NA>
NP NA: nan


Unnamed: 0,name,age
0,shaukat,22
1,,23


In [25]:
# both pd and np NANs are treated as nans in pandas

pd_na = pd.NA
np_nan = np.nan

print(pd.isna(pd_na), pd.isna(np_nan))

True True


#### Treating custom vals as NAN

In [26]:
sentinals = {"name": ["hamze", "NAN"]}

data_df_2 = pd.read_csv("data.csv", na_values=sentinals)
data_df_2

Unnamed: 0,name,age
0,shaukat,22
1,,23


#### Dropping NAN

In [31]:
# 1. dropping when all the vals of the row are nan
dropped = data_df_2.dropna(how="all")
print(data_df_2)

print("\n")

# 2. dropping when any vals of the row are nan
dropped = data_df_2.dropna(how="any")
print(dropped)

      name  age
0  shaukat   22
1      NaN   23


      name  age
0  shaukat   22


### Descriptive Stats

#### Funcs
- df.describe()
```For statistical info of numerical columns.```
- df.unique()
```For getting uniques values from a Series.```
- df.nunique()
```For getting num uniques values of each column of DataFrame.```

### Modifying data

In [39]:
data_df = pd.DataFrame([['a', 'b'], ['c', 'd'], ['e', 'f']], columns=['col2', 'col2'])
data_df

Unnamed: 0,col2,col2.1
0,a,b
1,c,d
2,e,f


In [42]:
data_df.iloc[2,] = ('ee', 'ff')
data_df

Unnamed: 0,col2,col2.1
0,a,b
1,c,d
2,ee,ff


In [43]:
# inserting a column
data_df

Unnamed: 0,col2,col2.1
0,a,b
1,c,d
2,ee,ff


In [44]:
data_df.insert(2, "col3", ['ab', 'cd', 'eeff'])
data_df

Unnamed: 0,col2,col2.1,col3
0,a,b,ab
1,c,d,cd
2,ee,ff,eeff


In [45]:
import pandas as pd

# Original DataFrame
df = pd.DataFrame({'col1': ['a', 'b'], 'col2': [1, 2]})

# Insert a row at index 1
new_row = {'col1': 'c', 'col2': 3}
df.loc[1.5] = new_row  # Assign to a new index (e.g., 1.5 for inserting between 1 and 2)

# Sort by index if needed
df = df.sort_index().reset_index(drop=True)
print(df)


  col1  col2
0    a     1
1    b     2
2    c     3


## To do
- insert a new_row
- boolean indexing using loc, iloc
- boolean masking (don't use loc and iloc)
- replace function
- interpolate func
- bar plot and modifying its visual
- other plots e.g. line, barh, pie, area