# **📦Importing libraries**

In [73]:
import pandas as pd

In [20]:
import numpy as np

## **Loading the data**

In [22]:
data = pd.read_csv('./IMDb_All_Genres_etf_clean1.csv')
data.head()

Unnamed: 0,Movie_Title,Year,Director,Actors,Rating,Runtime(Mins),Censor,Total_Gross,main_genre,side_genre
0,Kantara,2022,Rishab Shetty,"Rishab Shetty, Sapthami Gowda, Kishore Kumar G...",9.3,148,UA,Gross Unkown,Action,"Adventure, Drama"
1,The Dark Knight,2008,Christopher Nolan,"Christian Bale, Heath Ledger, Aaron Eckhart, M...",9.0,152,UA,$534.86M,Action,"Crime, Drama"
2,The Lord of the Rings: The Return of the King,2003,Peter Jackson,"Elijah Wood, Viggo Mortensen, Ian McKellen, Or...",9.0,201,U,$377.85M,Action,"Adventure, Drama"
3,Inception,2010,Christopher Nolan,"Leonardo DiCaprio, Joseph Gordon-Levitt, Ellio...",8.8,148,UA,$292.58M,Action,"Adventure, Sci-Fi"
4,The Lord of the Rings: The Two Towers,2002,Peter Jackson,"Elijah Wood, Ian McKellen, Viggo Mortensen, Or...",8.8,179,UA,$342.55M,Action,"Adventure, Drama"


### **_Selecting the rating column and converting into array_**

In [12]:
ratings = data['Rating'].values  # Convert to NumPy array


### **Appling _Shape, size, ndim, dtype_ on rating array**

In [79]:
print("Shape:", ratings.shape)
print("Size:", ratings.size)
print("Dimensions (ndim):", ratings.ndim)
print("Data type (dtype):", ratings.dtype)



Shape: (5,)
Size: 5
Dimensions (ndim): 1
Data type (dtype): float64


### **Appling _max, min, argmax, argmin_ on rating array**

In [26]:
print("Max Rating:", np.max(ratings))
print("Min Rating:", np.min(ratings))
print("Index of Max Rating (argmax):", np.argmax(ratings))
print("Index of Min Rating (argmin):", np.argmin(ratings))

Max Rating: 9.3
Min Rating: 1.0
Index of Max Rating (argmax): 0
Index of Min Rating (argmin): 1928


### **Applying _reshape_ on array** 

In [86]:
ratings = np.array([7.8, 8.1, 6.3, 9.0, 5.6])  # 1D array

reshaped = ratings.reshape(-1, 1)  # Reshape to 2D column vector

print(reshaped)

[[7.8]
 [8.1]
 [6.3]
 [9. ]
 [5.6]]


## **Checking dimension of complete dataset using _ndim_**

In [30]:
print("Dimensions of the dataset:", data.ndim)

Dimensions of the dataset: 2


## **Checking datatype of all features using _dtypes_**

In [92]:
print("datatype of all data:", data.dtypes)


datatype of all data: Movie_Title       object
Year               int64
Director          object
Actors            object
Rating           float64
Runtime(Mins)      int64
Censor            object
Total_Gross       object
main_genre        object
side_genre        object
dtype: object


# **Selecting numeric data**

In [96]:
numeric_data = data.select_dtypes(include=np.number)
print(numeric_data.head())

   Year  Rating  Runtime(Mins)
0  2022     9.3            148
1  2008     9.0            152
2  2003     9.0            201
3  2010     8.8            148
4  2002     8.8            179


##  **_📈 Copying Arrays using .copy()_**

In [106]:
ratings_copy = numeric_data["Rating"].copy()
print("Copied Ratings:\n", ratings_copy)

Copied Ratings:
 0       9.3
1       9.0
2       9.0
3       8.8
4       8.8
       ... 
5557    1.9
5558    1.9
5559    1.9
5560    1.5
5561    1.0
Name: Rating, Length: 5562, dtype: float64


### **➕Appending and Inserting using .append() & .insert()**

In [111]:
ratings_appended = np.append(ratings, 9.5)
print("After Append:\n", ratings_appended)

ratings_inserted = np.insert(ratings, 0, 8.0)
print("After Insert at index 0:\n", ratings_inserted[:5])


After Append:
 [9.3 9.  9.  ... 1.5 1.  9.5]
After Insert at index 0:
 [8.  9.3 9.  9.  8.8]


## **🔢 Sorting using _.sort()_**


In [114]:
ratings_sorted = np.sort(ratings)
print("Sorted Ratings:\n", ratings_sorted)



Sorted Ratings:
 [1.  1.  1.1 ... 9.2 9.3 9.3]


## **❌ Removing / Deleting**


In [117]:

ratings_deleted = np.delete(ratings, 0)
print("After Deleting Index 0:\n", ratings_deleted[:5])


After Deleting Index 0:
 [9.  9.  8.8 8.8 8.8]


## **🔗 Combining / Concatenating**

In [122]:
runtime = numeric_data['Runtime(Mins)'].values
combined = np.concatenate((ratings, runtime))
print("Combined Ratings + Runtime (first 10):\n", combined[:10])


Combined Ratings + Runtime (first 10):
 [9.3 9.  9.  8.8 8.8 8.8 8.7 8.7 8.6 8.6]



### **✂️ Splitting**

In [129]:
split_data = np.array_split(ratings, 3)
print("Splitted Data (Part 1):\n", split_data[0])



Splitted Data (Part 1):
 0       9.3
1       9.0
2       9.0
3       8.8
4       8.8
       ... 
1849    6.8
1850    6.8
1851    6.8
1852    6.8
1853    6.8
Name: Rating, Length: 1854, dtype: float64


  return bound(*args, **kwds)


## **🎯 Indexing and Logical Selection**

In [132]:
high_ratings = ratings[ratings > 8.5]
print("High Ratings > 8.5:\n", high_ratings)


High Ratings > 8.5:
 0       9.3
1       9.0
2       9.0
3       8.8
4       8.8
5       8.8
6       8.7
7       8.7
8       8.6
9       8.6
10      8.6
11      8.6
1550    9.0
1551    8.9
1552    8.7
1929    8.6
2186    8.8
2253    9.2
2254    9.0
2255    9.0
2256    9.0
2257    8.9
2258    8.9
2259    8.6
2260    8.6
2261    8.6
2262    8.6
3448    9.3
3449    9.2
3450    8.9
3451    8.8
3452    8.8
3453    8.7
3454    8.7
3455    8.6
3456    8.6
3457    8.6
3458    8.6
Name: Rating, dtype: float64


## **📤 Broadcasting**

In [135]:
ratings_plus_one = ratings + 1
print("Ratings + 1 (Broadcasted):\n", ratings_plus_one[:5])


Ratings + 1 (Broadcasted):
 0    10.3
1    10.0
2    10.0
3     9.8
4     9.8
Name: Rating, dtype: float64


## **🔄 Type Casting**

In [138]:
ratings_int = ratings.astype(int)
print("Ratings as Integers:\n", ratings_int[:5])


Ratings as Integers:
 0    9
1    9
2    9
3    8
4    8
Name: Rating, dtype: int32


## **➕➖ Arithmetic Operations**

In [141]:
add = ratings + 1
subtract = ratings - 1
multiply = ratings * 2
divide = ratings / 2
exponent = ratings ** 2

print("Add +1:\n", add[:5])
print("Subtract -1:\n", subtract[:5])
print("Multiply *2:\n", multiply[:5])
print("Divide /2:\n", divide[:5])
print("Exponent **2:\n", exponent[:5])


Add +1:
 0    10.3
1    10.0
2    10.0
3     9.8
4     9.8
Name: Rating, dtype: float64
Subtract -1:
 0    8.3
1    8.0
2    8.0
3    7.8
4    7.8
Name: Rating, dtype: float64
Multiply *2:
 0    18.6
1    18.0
2    18.0
3    17.6
4    17.6
Name: Rating, dtype: float64
Divide /2:
 0    4.65
1    4.50
2    4.50
3    4.40
4    4.40
Name: Rating, dtype: float64
Exponent **2:
 0    86.49
1    81.00
2    81.00
3    77.44
4    77.44
Name: Rating, dtype: float64


## **🧠 Universal Array Functions**

In [144]:
square_root = np.sqrt(ratings)
exponential = np.exp(ratings)
max_value = np.max(ratings)
sin_values = np.sin(ratings)

print("Square Root:\n", square_root[:5])
print("Exponential:\n", exponential[:5])
print("Max Value in Ratings:\n", max_value)
print("Sin Values:\n", sin_values[:5])


Square Root:
 0    3.049590
1    3.000000
2    3.000000
3    2.966479
4    2.966479
Name: Rating, dtype: float64
Exponential:
 0    10938.019208
1     8103.083928
2     8103.083928
3     6634.244006
4     6634.244006
Name: Rating, dtype: float64
Max Value in Ratings:
 9.3
Sin Values:
 0    0.124454
1    0.412118
2    0.412118
3    0.584917
4    0.584917
Name: Rating, dtype: float64


# **📊 Final Summary**

In [147]:
print("Shape of Numeric Data:", numeric_data.shape)
print("Data Types:\n", numeric_data.dtypes)
print("Max per Column:\n", numeric_data.max())
print("Min per Column:\n", numeric_data.min())
print("Index of Max Rating:\n", data['Rating'].idxmax())


Shape of Numeric Data: (5562, 3)
Data Types:
 Year               int64
Rating           float64
Runtime(Mins)      int64
dtype: object
Max per Column:
 Year             2022.0
Rating              9.3
Runtime(Mins)     321.0
dtype: float64
Min per Column:
 Year             1920.0
Rating              1.0
Runtime(Mins)      45.0
dtype: float64
Index of Max Rating:
 0
