Skip to content

Commit

Permalink
formatting
Browse files Browse the repository at this point in the history
  • Loading branch information
ManojBableshwar committed May 9, 2023
1 parent d26188e commit afa2be0
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,9 @@
)

train_df = pd.read_json(os.path.join(args.download_dir, "train.jsonl"), lines=True)
validation_df = pd.read_json(os.path.join(args.download_dir, "validation.jsonl"), lines=True)
validation_df = pd.read_json(
os.path.join(args.download_dir, "validation.jsonl"), lines=True
)
# this dataset doesn't have test data, so split the validation_df into test_df and validation_df
test_df = validation_df.sample(frac=0.5, random_state=42)
validation_df.drop(test_df.index, inplace=True)
Expand All @@ -52,13 +54,15 @@

# save 20% of the rows from the dataframes into files with small_ prefix in the ./news-summary-dataset folder
train_df.sample(frac=0.2).to_json(
os.path.join(args.download_dir,"small_train.jsonl"), orient="records", lines=True
os.path.join(args.download_dir, "small_train.jsonl"), orient="records", lines=True
)
validation_df.sample(frac=0.2).to_json(
os.path.join(args.download_dir,"small_validation.jsonl"), orient="records", lines=True
os.path.join(args.download_dir, "small_validation.jsonl"),
orient="records",
lines=True,
)
test_df.sample(frac=0.2).to_json(
os.path.join(args.download_dir,"small_test.jsonl"), orient="records", lines=True
os.path.join(args.download_dir, "small_test.jsonl"), orient="records", lines=True
)


Expand All @@ -68,7 +72,7 @@
import json

test_df = pd.read_json(
os.path.join(args.download_dir,"small_test.jsonl"), orient="records", lines=True
os.path.join(args.download_dir, "small_test.jsonl"), orient="records", lines=True
)
# take 1 random sample
test_df = test_df.sample(n=1)
Expand All @@ -79,5 +83,5 @@
# create a json object with the key as "inputs" and value as a list of values from the article column of the test dataframe
test_json = {"inputs": {"input_string": test_df["article"].tolist()}}
# save the json object to a file named sample_score.json in the ./emotion-dataset folder
with open(os.path.join(args.download_dir,"sample_score.json"), "w") as f:
with open(os.path.join(args.download_dir, "sample_score.json"), "w") as f:
json.dump(test_json, f)
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@
"except:\n",
" workspace_ml_client = MLClient(\n",
" credential,\n",
" subscription_id = \"<SUBSCRIPTION_ID>\",\n",
" resource_group_name = \"<RESOURCE_GROUP>\",\n",
" workspace_name = \"WORKSPACE_NAME>\",\n",
" subscription_id=\"<SUBSCRIPTION_ID>\",\n",
" resource_group_name=\"<RESOURCE_GROUP>\",\n",
" workspace_name=\"WORKSPACE_NAME>\",\n",
" )\n",
"\n",
"# the models, fine tuning pipelines and environments are available in the AzureML system registry, \"azureml-preview\"\n",
Expand Down Expand Up @@ -188,7 +188,8 @@
"source": [
"# download the dataset using the helper script. This needs datasets library: https://pypi.org/project/datasets/\n",
"import os\n",
"exit_status=os.system(\"python ./download-dataset.py\")\n",
"\n",
"exit_status = os.system(\"python ./download-dataset.py\")\n",
"if exit_status != 0:\n",
" raise Exception(\"Error downloading dataset\")"
]
Expand Down

0 comments on commit afa2be0

Please sign in to comment.