In [None]:
df["ProdCategory"].value_counts().plot(kind="barh")

In [None]:
df = pd.read_csv(csv_path, nrows=5, index_col="id", usecols=["id", "artist"])

In [None]:
cols_to_use = ['id', 'artist', 'title', 'medium', 'year', 'acquisitionYear', 
               'height', 'width', 'units']
df = pd.read_csv(csv_path, index_col='id', usecols=cols_to_use)

=> Indexing

In [None]:
# access row 1035 from "artist" column
df.loc[1035, "artist"]

In [None]:
# first row and first column
df.iloc[0, 0]

In [None]:
# first row and all columns
df.iloc[0, :]

In [None]:
# two rows and two columns
df.iloc[0:2, 0:2]  # 0 and 1, 0 and 1, 2 is not counted

In [None]:
df['width'].sort_values().head()

Here, to clean our data, we will select all non-numeric values and assign them a new data type called NaN (Not A Number) this means that our data will be separated, one goes for numeric values where we can perform our computations and other goes for garbage values which we can't use. Basically, those values are integers but technically they are not numeric, so computer does not know as of its naming, so what we will do is, when we convert them to NaN, it will skip those values. In other words, we dont count them in our operations

In [None]:
pd.to_numeric(df['width'], errors="coerce")

In [None]:
df.loc[:, 'width'] = pd.to_numeric(df['width'], errors="coerce")

In [None]:
area = df['height'] * df['width'] # as NaN removed we can multiply both columns, read again above to understand what is going on

In [None]:
# assign() is used to add a new column to our data frame
df = df.assign(area=area)

In [None]:
df['area'].max()

In [None]:
df['area'].idxmax()   # returns the index of max value in this column

In [None]:
df.loc[df['area'].idxmax(), :]
# it will take idxmax() position number as row index, and all others for column, then output the result
# basically we will get all the column details for specific row number

=> Filtering

In [8]:
# lets find out how many artworks belong to Blake William
# first approach
blake_william = df['artist'] == "Blake, William"
blake_william.value_counts()

In [None]:
# second approach
artist_count = df['artist'].value_counts()
artist_count['Jones, George']

=> excel & csv

In [None]:
# reading data from excel file
sales = pd.read_excel(excel_path)
sales = pd.read_excel(excel_path, sheet_name='sales', header=None)

In [None]:
# to save this xlsx file to csv file
sales.to_csv('sales2.csv')
sales.to_excel('sales2.xlsx')

In [None]:
titanic.describe(include = 'O')

In [None]:
titanic.Survived.replace(to_replace=["yes", "no"], value=[1, 0], inplace=True)
# new version
titanic['Survived'] = titanic['Survived'].replace(to_replace=['yes', 'no'], value=[1, 0])

In [None]:
# changing the name of a column
summer.rename(columns={'Athlete': 'Athlete_Name'}, inplace=True)

In [None]:
pd.to_numeric(titanic.Fare)
# when these is a string like $75.0 pandas cant recognize this
titanic.Fare.str.replace('$', '')

In [None]:
summer.Athlete_Name.str.title() # here it capitilized their name to be more consice
summer.Athlete_Name.str.strip()
summer.loc[summer.Athlete_Name == 'PHELPS, Michael'].head(3)

In [None]:
pd.to_numeric(titanic.Fare)
titanic['Fare'] = titanic.Fare.astype('float')
titanic['Survived'] = titanic.Survived.astype('int')
titanic['Age'] = titanic.Age.astype('float')

=> NA value Detection

In [None]:
titanic.isna()
titanic.isna().sum()
titanic.isna().sum(axis=0)
titanic.isna().any()
titanic.isna().all()
titanic[titanic.isna().any(axis=1)]

In [None]:
titanic.notna()  # opposite for isna()
titanic.notna().sum()
titanic.notna().sum(axis=0)
titanic.notna().all()
titanic.notna().any()

In [None]:
titanic.duplicated().sum()

In [None]:
titanic.drop_duplicates(inplace=True)

In [None]:
titanic.dropna(inplace=True)

In [None]:
plt.figure(figsize=(6, 6))
sns.heatmap(titanic.notna())

In [None]:
sns.heatmap(titanic.isna())

In [None]:
titanic.Age.mean(skipna=True)

In [None]:
titanic['Age'] = titanic['Age'].fillna(titanic.Age.mean(skipna=True))

In [None]:
most_freq = titanic['Cabin'].value_counts().idxmax()

In [None]:
titanic['Cabin'] = titanic['Cabin'].fillna(most_freq)

=> Data Inspection

In [None]:
titanic.size # number of rows * number of columns

In [None]:
# you can set max display row options and min display options for better reading
pd.options.display.max_rows = 100
pd.options.display.min_rows = 30
# by default they are:
pd.options.display.max_rows = 60 # 10
pd.options.display.min_rows = None

In [None]:
titanic.min(numeric_only=True)
titanic.max(numeric_only=True)
titanic.mean(numeric_only=True)
titanic.mean(numeric_only=True).sort_values()

In [None]:
titanic.sort_values(by = 'Age', ascending=False)

In [21]:
# you can set index_col to string and use loc['string'] to get that data
titanic = pd.read_csv(csv_path, index_col='Name')

In [None]:
titanic.loc['Barkworth, Mr. Algernon Henry Wilson']
# this works not only for columns titanic.loc[:, 'Names']
# reindex() is used with time series

=> Pandas Series

In [None]:
age = titanic.Age # created a pandas series from pandas dataframe

In [None]:
age.to_frame().info()

In [None]:
age.to_frame() # names is index column, so only Age column is counted

In [None]:
age.sum(skipna=True)

In [None]:
sum(age) # wrong because Nan, cannot handle missing values

In [None]:
age.meadian(skipna=True)

In [None]:
len(age.unique()) # to get the number of unique values
# dropna = True, because in len() NAs are also counted
age.nunique(dropna=True)

In [None]:
age.value_counts(sort=True, dropna=True, ascending=False, normalize=True)
# here in percentage results are give, 42% of passangers were aged 24 and so on...

In [None]:
people = titanic.people
for person in people:
    print(person)

In [None]:
titanic.people.value_counts()

In [None]:
titanic.columns

=> Analysing non-numerical Series

In [None]:
athlete = titanic.Athlete # creating a pandas series from pandas DF

In [None]:
athlete.size
athlete.nunique(dropna=True)
athlete.value_counts()
athlete.value_counts(sort=True)
athlete.value_counts(sort=True, ascending=False, normalize=True)

Normalization rescales feature values within a predefined range, often between 0 and 1, which is particularly useful for models where the scale of features varies greatly. In contrast, standardization centers data around the mean (0) and scales it according to the standard deviation (1).

In [None]:
age.sort_index() # sort by their index number
age.sort_values() # sort by their values
age.sort_index(inplace=True) # this will make changes to the original data not a copy

In [None]:
# nlargest() & nsmalles()
# this will return the number of largest or smallest values
age.nlargest(3) # 3 largest values return
age.nsmallest(4) # 4 smallest values return

In [None]:
age.sort_values(ascending=True).iloc[:4] #start from beginning till 3 row, it is series
# what i mean, :3, from start till 3, no need for column part as it is 1d array

In [None]:
age.iloc[2] = 20
age.iloc[0] = 101  # we can assign new values to them by indexing

In [None]:
titanic[titanic.Sex == 'Male']

In [None]:
only_male = titanic.Sex == 'Male'

In [None]:
titanic[only_male].head()

In [None]:
titanic[titanic.Age > 25]

In [None]:
age = titanic.Age > 25
(only_male & age)

=> Remove Columns

In [None]:
# to remove columns use df.drop(columns='columnName')
titanic.drop(columns='PassengerId', inplace=True)

=> Remove Rows

In [None]:
rows_to_delete = list(range(0, 100))
titanic.drop(index = rows_to_delete)

In [None]:
(titanic.Year == 1996).value_counts()

In [None]:
1996 in titanic.Year.values
# True or False

In [None]:
# to create new column and add to specific location
relatives = titanic.Siblings + titanic.Parents
titanic.insert(loc=5, column='relatives', value=relatives)

=> Sorting DF with sort_index() and sort_values()

In [None]:
titanic.Age.sort_values(ascending=False)
titanic.sort_values(by=['Age', 'Siblings', 'Pclass'])

=> rank()

In [None]:
titanic.Age.rank()
titanic.Age.rank(numeric_only = True)

In [None]:
titanic.count(axis = 0) # 0 for rows
titanic.count(axis = 1) # 1 for columns

In [None]:
titanic.count(axis = 'index')
titanic.count(axis = 0, numeric_only = True) # how many rows in each column
titanic.count(axis = 1, numeric_only = True) # how many columns in each row

=> Matplotlib

In [None]:
matplotlib.is_interactive()
%matplotlib --list
matplotlib.get_backend()

In [None]:
plt.xlabel('X coordinates', fontsize=14, color='blue')
plt.ylabel('Y coordinates', fontsize=14, color='red')

In [None]:
titanic.Age.plot(label='Age', color='green', linestyle='--', linewidth=2, marker='d',
                markersize=7, )

=> Group By

In [None]:
group_by_sex = titanic.groupby('Sex')

In [None]:
group_by_age = titanic.groupby('Age').size() 

In [None]:
group_by_sex.groups

In [None]:
len(list(group_by_sex)) # 2 as gender: male and female
group_by_sex[0][0] # female
group_by_sex[1][0] # male

In [None]:
split = titanic.groupby(by=['Sex', 'Survived'])
for s in split:
    print(s[0])

=> Seaborn

In [None]:
# bar plot for categorical data
plt.figure(figsize=(5, 5))
sns.countplot(data = titanic, x = 'Sex')
sns.countplot(data = titanic, y = 'Sex') # y -> vertical
# countplot shows the number of observations in each categorical bin using bars

In [None]:
# bar plot for categorical data
plt.figure(figsize=(4, 4))
sns.countplot(data = titanic, x = 'Sex', hue='Survived') # hue -> add column

In [None]:
# bar plot for categorical data
plt.figure(figsize=(5, 5))
sns.set(font_scale=1.5, palette='viridis')
sns.countplot(data = titanic, x='Sex', hue='Pclass')

In [None]:
# categorical plots
# scatter plot kinda
sns.stripplot(data=titanic, x='Sex', y='Age', jitter=True, hue='Survived', dodge=True)

In [None]:
# kinda scatter plot
sns.swarmplot(data=titanic, x='Sex', y='Age', hue='Pclass', dodge=True)

In [None]:
# violin plot
sns.violinplot(data=titanic, x='Sex', y='Age', hue='Survived', dodge=True) # ,split=True

In [None]:
# scatter plot and violion plot on the same container or plotting area
sns.violinplot(data=titanic, x='Sex', y='Age', hue='Pclass', dodge=True)
sns.swarmplot(data=titanic, x='Sex', y='Age', hue='Pclass', dodge=True, color='black')

In [None]:
# bar plot
sns.barplot(data=titanic, x='Sex', y='Age', hue='Pclass', dodge=True)

In [None]:
# point plot
plt.figure(figsize=(5, 5))
sns.set(font_scale=1.5, palette='viridis')
sns.pointplot(data=titanic, x='Pclass', y='Age', hue='Sex', dodge=True)

In [None]:
# joint plot - linear regression model
sns.jointplot(data=titanic, x='Age', y='Fare', height = 6, kind = 'reg') #'kde', 'scatter'
# by 'reg' we can draw regression line
# up and right side you can see frequency distribution

In [None]:
# linear regression model plot
sns.lmplot(data=titanic, x='Age', y='Fare', aspect = 1, height = 6, col = 'Sex')
# sns.lmplot(data=titanic, x='Age', y='Fare', aspect = 1, height = 6, row = 'Sex')
# sns.lmplot(data=titanic, x='Age', y='Fare', aspect = 1, height = 6, hue = 'Sex')

In [None]:
# linear regression model plot
sns.lmplot(data=titanic, x='Age', y='Survived', aspect=1, height=6, 
           col='Sex', logistic=True)

In [None]:
# cross table
pd.crosstab(titanic.Sex, titanic.Survived)

In [None]:
# heatmap
sns.heatmap(pd.crosstab(titanic.Sex, titanic.Survived), annot=True, 
           fmt='d', cmap='Reds', vmax=100) 
# vmax -> to set a limit, ex: above 100 is dark red
# cmap -> color type

=> complex plotting

In [None]:
rcParams.update({"figure.autolayout":True, "axes.titlepad": 20})
plt.rcParams['figure.figsize'] = (20, 10)
title_font = {
    'family':'times new roman', 
    'color': 'darkblue',
    'weight': 'normal',
    'size': 24,
}
labels_font = {
    'family': 'consolas',
    'color': 'darkred',
    'weight': 'normal',
    'size': 20,
}
fig = plt.figure()
subplot = fig.add_subplot(1, 1, 1)
acquisition_year.plot(ax=subplot, rot=45, logy=True, grid=True) #rot will rotate years position
subplot.set_xlabel('Acquisition Year', fontdict=labels_font, labelpad=10)
subplot.set_ylabel('Artworks Acquired', fontdict=labels_font, labelpad=10)
subplot.set_title('Tate Gallery Acquisitions', fontdict=title_font)
plt.xticks(fontsize=16)
plt.yticks(fontsize=18)
subplot.locator_params(nbins=50, axis='x') 

In [None]:
# save this graph to png format
fig.savefig('user_info/plot1.png')

In [None]:
# svg format - scalable vector graphics
fig.savefig('user_info/plot2.svg')

=> Additional Plots

In [None]:
titanic.Survived.value_counts().plot(kind='bar') #hbar

In [None]:
titanic.Survived.value_counts().plot(kind='pie')

=> Save as Excel file

In [None]:
df.to_excel('artist_example.xlsx')
df.to_excel('artist_example.xlsx', index=False, columns=['Name', 'Surname', 
                                                         'Birthday', 'Employment'])

=> DF continue...

In [None]:
small_df = df.iloc[49980:50019, :].copy()
small_df

In [None]:
grouped = small_df.groupby('artist')
for name, group_df in grouped:
    print(name)

In [None]:
# First Example: Aggregation
for name, group_df in grouped:
    max_year = group_df['acquisitionYear'].max()
    print(f"{name}: {max_year}")

=> Advanced Function

In [40]:
# Filter functions
#students = ["Elbek", "Adil", "Kadir", "Boy2", "Robert", "Elis", "Elvis", 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
students = "Elbek Adil Kadir Boy Robert Norber Elvis Lis 1 2 3 4 5 6 7 8 9 0".split()
selected_students = ["Elbek", "Kadir", "Boy"]

def extract_selected_students(item):
    if item in selected_students:
        return True
    else:
        return False

students_after_selection = filter(extract_selected_students, students)
print(list(students_after_selection))

['Elbek', 'Kadir', 'Boy']


In [42]:
# Lambda Functions
# use lambda to sort...
products = [
    ("Item1", 10),
    ("Item2", 12),
    ("Item3", 3),
    ("Item4", 5),
    ("Item5", 16),
    ("Item6", 4)
]
products.sort(key=lambda item: item[1])
print(products)
# use filter with lambda to take even numbers
random_numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0]
even_numbers = filter(lambda a: (a % 2 == 0), random_numbers)
print(list(even_numbers))
# for i in random_numbers:
#     if i % 2 == 0:
#         even_numbers.append(i)
# print(even_numbers)

[('Item3', 3), ('Item6', 4), ('Item4', 5), ('Item1', 10), ('Item2', 12), ('Item5', 16)]
[2, 4, 6, 8, 0]


In [44]:
# Map Functions
fruits = ("apple", "banana", "cherry")
# calculate length of each item in fruits 
def calculate_len(item):
    return len(item)
result = map(calculate_len, fruits)
print(list(result))
results = []
for item in fruits:
    results.append(len(item))
print(results)
results = [len(item) for item in fruits]
print(results)

numbers = [1, 2, 3, 4, 5, 6, 7, 8, 9]
def calculate_square(n):
    return n * n
result_sqr = map(calculate_square, numbers)
print(list(result_sqr))

[5, 6, 6]
[5, 6, 6]
[5, 6, 6]
[1, 4, 9, 16, 25, 36, 49, 64, 81]


In [46]:
# Zip Functions
numbers = [1, 2, 3, 4, 5, 6]
names = ['Kadir', 'Adil', 'Fozil']
alpha = "A B C D E F G".strip()
result_zipped = zip(numbers, names, alpha)
print(list(result_zipped))

keys = ["name", "surname", "age", "status", "id"]
values = ["Kadir", "Abdusalomov", 22, "student", 27]
mix = zip(keys, values)
print(dict(mix))

[(1, 'Kadir', 'A'), (2, 'Adil', ' '), (3, 'Fozil', 'B')]
{'name': 'Kadir', 'surname': 'Abdusalomov', 'age': 22, 'status': 'student', 'id': 27}


In [48]:
# Arrays
from array import array
numbers = array("i", [1, 2, 3])
numbers.append(4)
print(numbers)

array('i', [1, 2, 3, 4])


In [54]:
# Generator Objects
from sys import getsizeof
# list 
numbers_as_list = [n*100 for n in range(1000000)]
# tuple
numbers_as_tuple = tuple(n*100 for n in range(1000000))
# generator expressions
numbers_as_gen_exp = (n*100 for n in range(1000000))
print(f"List size: {getsizeof(numbers_as_list)} Tuple size: {getsizeof(numbers_as_tuple)} Generator Expression size: {getsizeof(numbers_as_gen_exp)}")
# Generator Expressions are efficient while working with large volumes of data compared to other data types like lists or tuples

List size: 8448728 Tuple size: 8000040 Generator Expression size: 200


In [56]:
# Unpacking Operator
numbers = [1, 2, 3, 4, 5]
print(*numbers)
values = [*range(1, 10, 2)]
print(*values)
print(*"Mukhammadkodir")
print(*{*"Python"})
cities = "Warsaw, Berlin, London, Paris".split(',')
countries = "Poland Germany UK France".split()
info = [*cities, *countries]
print(*cities, *countries, *info)

dict_one = {"name":"Mukhammadkodir"}
dict_two = {"surname": "Abdusalomov"}
dict_three = dict(age = 22)
dict_combined = {**dict_one, **dict_two, **dict_three}
print(dict_combined)

1 2 3 4 5
1 3 5 7 9
M u k h a m m a d k o d i r
h y o P n t
Warsaw  Berlin  London  Paris Poland Germany UK France Warsaw  Berlin  London  Paris Poland Germany UK France
{'name': 'Mukhammadkodir', 'surname': 'Abdusalomov', 'age': 22}


In [None]:
# two conditions with LC
numbers = [21, 24, 56, 564, 102, 504, 79, 84]
new_nums, new_numbers = [], []

# without LC
for num in numbers:
    if num < 100:
        new_nums.append(num)
    else:
        new_nums.append(num*2)
print(new_nums)
# now with LC
new_numbers = [num if num < 100 else num * 2 for num in numbers]
print(new_numbers)
# or 
items = [item for item in products]
items = [item[0] for item in products]
new_items_new = [item[0] for item in products if item[1] > 10]

In [None]:
collection = [28, "M", 32, "H", "Parrot", "Sea Biscuit"]
print(collection[::-1])
print(collection.reverse()) # returns NONE but it worked

In [62]:
# enumerate 
letters = list("ABCDE")
for index, item in enumerate(letters):
    print(index, item)

0 A
1 B
2 C
3 D
4 E


In [64]:
# list methods to keep in mind
# .append() .insert(index, value) .pop() .pop(index) .remove(item_name)  del numbers[0:5]  .reverse() .clear() .join()
print("".join(letters))
print(" ".join(letters))
print("-".join(letters))
print(", ".join(letters))
random_values = ["Alexander", "Heroku", "Druseldor"]
print(random_values)
print(", ".join(random_values))

ABCDE
A B C D E
A-B-C-D-E
A, B, C, D, E
['Alexander', 'Heroku', 'Druseldor']
Alexander, Heroku, Druseldor


In [66]:
def sort_products(product):
    return product[1]
products.sort(key=sort_products)
print(products)

[('Item3', 3), ('Item6', 4), ('Item4', 5), ('Item1', 10), ('Item2', 12), ('Item5', 16)]


In [None]:
# Sets are mutable - values can be changed, unordered, duplicates are removed - only unique values
example_set = {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}
example_set = {1, 2, "This_Set", ("London",)}
# we cannot have mutable objects in our set, here we have tuple as it is unmutable
empty_set = set()
# set from a tuple
colors = set(("red", "blue", "red", "green"))
# set from a list
colors = set(["red", "blue", "red", "green"])
# set from a dictionary
colors = set({"main_color":"red", "ad_color":"blue", "bottom_color":"red", "div_color":"green"})
colors_ndict = set(dict(main_color="red", ad_color = "blue"))

#sets are unordered that's why indexing is not allowed
colors.add("white")
colors.update("white", "silver", ("gold",))
colors.discard("silverr")
colors.remove("gold")
colors.pop()
# colors.clear()
same_color = colors # both have same reference id (storage id), one change affects both
copied_color = colors.copy()
print(id(colors)) #2339080440288
print(id(same_color)) #2339080440288
print(id(copied_color)) #2339080442976

In [None]:
# Set Operations
A = {1, 2, 3, 4, 5}
B = {4, 5, 6, 7, 8}
print(A.union(B))
print(A.intersection(B))
print(A.difference(B))
print(A.symmetric_difference(B))

In [None]:
# Set Comprehensions SC
numbers = {1, 2, 3, 4, 5, 6, 7, 8, 9}
modified = set()
modified2 = set()
# SC
modified_numbers = {num ** 2 for num in numbers}
print(modified_numbers)
# without SC
for num in numbers:
    modified.add(num**2)
print(modified)
# SC
modified_numbers = {num ** 2 for num in range(9)}
print(modified_numbers)
# without SC
for num in range(9):
    modified2.add(num**2)
print(modified2)

In [None]:
# Tuples, unchangeable elements
names, numbers, ids = (), (), ()
names = "Kadir", 
names = ("Adil",) * 2
numbers = 1, 2, 3, 4, 5, 6
numbers = (1, 2, 3, 4, 5, 6)

# we cannot delete individual items but we can delete tuple entirely 
#del names, numbers
#.count(), .index()
print("Adil" in names)

In [None]:
# Exception Handling
import sys

try:
    pass
except:
    pass
else:
    pass
finally:
    pass

try:
    age = int(input("age: "))
except ValueError as value_error:
    print("Value Error!", "......", value_error, "......", type(value_error))
else:
    print("Success! No exceptions here")

In [None]:
class Car:
    is_new = True
    is_used = False
    def __init__(self, name=None, brand=None, color=None, year=None):
        self.name = name if name else "Default Name"
        self.brand = brand if brand else "Default Brand"
        self.color = color if color else "Default Color"
        self.year = year if year else 2024
    def get_car_info(self):
        return f"This is {self.name} manufactured by {self.brand} in {self.year}"
tesla_s1 = Car()
tesla_s1.is_electric = True
print(f"{tesla_s1.get_car_info()} and operates in electricity" if tesla_s1.is_electric else f"This car is not electric")

In [None]:
import math
import statistics

class Calc:
    mem_list = []
    def __init__(self, name=None, producer=None, color=None):
        self.name = name if name else "Default"
        self.producer = producer if name else "Default"
        self.color = color if color else "Default"
    def add(self, a, b):
        self.mem_list.append(a+b)
        return a + b
    def substract(self, a, b):
        self.mem_list.append(a-b)
    def divide(self, a, b):
        self.mem_list.append(a//b)
    def multiply(self, a, b):
        self.mem_list.append(a*b)
    def __str__(self):
        return f"Here are the results: {self.name} {self.producer}, {self.color}"
    def print_me(self):
        [i for i in self.mem_list]
        # for i in self.mem_list:
        #     print(i)

In [None]:
class Scientific_calc(Calc):
    def __init__(self, name, producer, color):
        Calc.__init__(self, name, producer, color)
    def log(self, a):
        self.mem_list.append(math.log(a))
        return math.log(a)
    def pow(self, a, b):
        self.mem_list.append(a**b)
        return a**b

In [None]:
class ExtraCalc(Calc):
    def __init__(self, name, producer, color, owner_name):
        Calc.__init__(self, name, producer, color)
        self.owner_name = owner_name
    def get_min_result(self):
        return min(self.mem_list)
    def get_mean_result(self):
        return statistics.mean(self.mem_list)
    def get_std_result(self):
        return statistics.stdev(self.mem_list)

In [None]:
random_value = Calc("Hmm", "Hmm", "red")
random_value.add(5, 5)
random_value.substract(5, 5)
random_value.divide(5, 5)
random_value.multiply(5, 5)
random_value.print_me()
rr_val = Scientific_calc("Hmm", "Hmm", "red")
print(rr_val.log(5))
print(rr_val.pow(5, 5))
r_val = ExtraCalc("Hmm", "Hmm", "Hmm", "Hmm")
print(r_val.get_min_result())
print(r_val.get_mean_result())
print(r_val.get_std_result())

In [None]:
class Experiment(ExtraCalc, Calc):
    def __init__(self, owner_name, name, producer, color, production_year=None, quantity=None):
        ExtraCalc.__init__(self, owner_name)
        Calc.__init__(self, name, producer, color)
        self.production_year = production_year if production_year else 2024
        self.quantity = quantity if quantity else 0
    def __str__(self):
        return f"Results: {self.name} produced in {self.production_year}"
    @classmethod
    def specifics(cls, name):
        return ""

In [None]:
expr = Experiment.specifics("Tesla Inc")

In [None]:
nums = list(range(1, 15))
[n for n in nums]

=> Standard Library + Paths

In [72]:
# Path() object represents the current folder
from pathlib import Path
path = Path()
print(path)

.


In [76]:
# getting the come directory of the current user
print(Path.home())
Path.home()

C:\Users\khali


WindowsPath('C:/Users/khali')

In [None]:
print(path.exists())
print(path.is_file())
print(path.is_dir())
print(path.name)
print(path.stem)
print(path.suffix)
print(path.parent)
# with_name
# with_suffix

In [74]:
# Directories
temp_path = Path.cwd()
print(temp_path)

C:\Users\khali\Desktop\Jupyter\00_Initial_Trial


In [None]:
# mkdir() rmdir() rename() iterdir() cwd() 
# iterdir() used to get the list of files and directories at this path.
# -> use for loop or else it returns generator object

In [78]:
for files in temp_path.iterdir():
    print(files)

C:\Users\khali\Desktop\Jupyter\00_Initial_Trial\.ipynb_checkpoints
C:\Users\khali\Desktop\Jupyter\00_Initial_Trial\01_Initial_Trial
C:\Users\khali\Desktop\Jupyter\00_Initial_Trial\02_Initial_Trial
C:\Users\khali\Desktop\Jupyter\00_Initial_Trial\Collections.ipynb
C:\Users\khali\Desktop\Jupyter\00_Initial_Trial\Data_Science
C:\Users\khali\Desktop\Jupyter\00_Initial_Trial\Python_SQL


In [82]:
existing_data = [data for data in temp_path.iterdir()]
print(existing_data)

In [None]:
only_files = [file for file in temp_path.iterdir() if file.is_file()]
print(only_files)

In [None]:
only_folders = [folder for folder in temp_path.iterdir() if folder.is_dir()]
print(only_folders)

In [None]:
python_files = [file for file in temp_path.glob("*.py")]
print(python_files)

In [None]:
all_existing_files = [file for file in temp_path.rglob("*.*")]
print(all_existing_files)

In [86]:
import random
print(random.randint(1, 50))

49


In [88]:
print(random.choice(["Python", "R", "SQL"]))

R


In [90]:
print(random.choices([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], k=5))

[4, 3, 2, 2, 1]


In [None]:
random.shuffle(numbers)

In [None]:
# creating a list of random integers using random.randint()
random_list = [random.randint(1, 50) for i in range(10)]
print(random_list)

In [None]:
# creating a list of random values using random.sample()
random_list = [random.sample(range(1, 50), 10)]
print(random_list)

In [None]:
import webbrowser
webbrowser.open("http://google.com")

=> NumPy functions

In [None]:
# loading data from external files using numpy
salaries = np.loadtxt('data/salary.csv', delimiter=',')
salaries.shape

In [None]:
mean = np.mean(salaries)
median = np.median(salaries)
std = np.std(salaries)
variance = np.var(salaries)

In [None]:
import sys
test_scores = np.array(salaries)
np.set_printoptions(threshold=sys.maxsize)
test_scores